Hi all,
i have a rather large index of documents with a UID field to identify the users, the @timestamp field ans some other irrelevant fields for what i'm trying to do
i'm trying to get the 25th, 50th and 75th percentiles of daily average of documents per users
this is my request so far (just ignore the whole query with range and match_phrase; these are working right; it's the aggregations after that are causing me some trouble):
POST /csv_index/_search { "size": 0, "query": { "bool": { "must": [ { "match_all": {} }, { "bool": { "should": [ { "match_phrase": { "function.keyword": "Operations Manager" } } ], "minimum_should_match": 1 } }, { "range": { "@timestamp": { "gte": "2018-02-01T00:00:00", "lt": "2018-05-26T00:00:00" } } } ], "filter": [], "should": [], "must_not": [] } }, "aggs": { "message_per_user_per_day": { "date_histogram": { "field": "@timestamp", "interval": "1d" }, "aggs": { "messages_per_users": { "terms": { "field": "UID" } }, "aggs": { "messages": { "sum": { "field": "@timestamp" } } } } }, "percentiles_daily_messages": { "percentiles_bucket": { "buckets_path": "message_per_user_per_day>messages_per_users", "percents": [ 0, 25, 50, 75, 100 ] } } } }
i haven't added the final average yet but i'm pretty stuck and i'm not even sure this is going to achive what i'm trying to get in the end; my term aggregation to split my sum(/value_count) aggregation dosen't seems right either. I get the Unknown BaseAggregationBuilder exception and i think it's related to the disposition of my sub aggregations but it's rather unclear to me.
thanks!
UPDATE: i used kibana to get part of what i'm looking for and here's my ''new'' query so far:
POST /csv_index/_search
{
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"messages_per_day": {
"date_histogram": {
"field": "@timestamp",
"interval": "1d",
"time_zone": "America/New_York",
"min_doc_count": 1
},
"aggs": {
"messages_per_users_per_day": {
"terms": {
"field": "pein.keyword",
"order": {
"_count": "desc"
}
}
}
}
}
},
"avg_messages_per_users_per_day": {
"avg_bucket": {
"buckets_path": "messages_per_day>messages_per_users_per_day"
}
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
"@timestamp"
],
"query": {
"bool": {
"must": [
{
"match_all": {}
},
{
"range": {
"@timestamp": {
"gte": "2018-02-01T00:00:00",
"lt": "2018-05-26T00:00:00"
}
}
}
],
"filter": [],
"should": [],
"must_not": []
}
}
}
i tried to add the avg_bucket on which i'm going to add the parcentile after but i'm having this error:
{
"error": {
"root_cause": [
{
"type": "parsing_exception",
"reason": "Unknown key for a START_OBJECT in [avg_messages_per_users_per_day].",
"line": 26,
"col": 37
}
],
"type": "parsing_exception",
"reason": "Unknown key for a START_OBJECT in [avg_messages_per_users_per_day].",
"line": 26,
"col": 37
},
"status": 400
}