I have a reports index and I need to count the number of reports per-user that have a specific string (keyword) field value in their last document (sorted by time) in some time range.
I tried bucketing by user -> top (1) hits -> get max value -> filter by max value -> count buckets.
It's supposed to be something like this, but this only works if the field is numeric (the "max" aggregation doesn't work on string values).
Basically I need to extract the single string value from the top hit in order to filter the buckets.
How can this be achieved?
POST /reports/_search
{
"size": 0,
"query": {
"bool": {
"must": {
"range": {
"timestamp": {
"gte": "2020-12-01T21:00:00.000Z",
"lte": "2020-12-31T22:41:35.092Z",
"format": "strict_date_optional_time"
}
}
}
}
},
"aggs": {
"distinct_users": {
"terms": {
"field": "userId",
"size": 10000
},
"aggs": {
"top_report": { // get last document by date
"top_hits": {
"size": 1,
"sort": [
{
"timestamp": "desc"
}
],
"_source": "some_report_string_field"
}
},
"max_value": { // get field value of last document
"max": {
"field": "some_report_string_field"
}
},
"value_selector": { // select only users with a specific field value in their last document
"bucket_selector": {
"buckets_path": {
"max": "max_value.value"
},
"script": "params.max == 'some_value'"
}
}
}
},
"users_count": { // count the filtered users
"stats_bucket": {
"buckets_path": "distinct_users._count"
}
}
}
}