Hello everyone,
Here is my mapping in my index "my-index-000001"
{
"mappings": {
"properties": {
"objectId": { "type": "keyword" },
"dateTime": { "type": "date" },
"status": { "type": "keyword" }
}
}
}
My data is saved as soon as we got an update on the field "status" and I do not update it, I just create another document with the updated status, the new dateTime but with the same "objectId", to be able to reconstruct the lifetime of an object.
For example, I have data like this :
❯ curl -X GET "http://localhost:9200/my-index-000001/_search?pretty" -H "Content-Type: application/json" -d ''
{
"took" : 8,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_id" : "ePhHHZQBEON9Xl1F-JWQ",
"_score" : 1.0,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T00:00:00",
"status" : "SUCCESS"
}
},
{
"_index" : "my-index-000001",
"_id" : "efhIHZQBEON9Xl1F2ZU8",
"_score" : 1.0,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T01:00:00",
"status" : "REVIEW"
}
},
{
"_index" : "my-index-000001",
"_id" : "evhJHZQBEON9Xl1FWpV-",
"_score" : 1.0,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T02:00:00",
"status" : "MANUAL-SUCCESS"
}
},
{
"_index" : "my-index-000001",
"_id" : "e_hJHZQBEON9Xl1F3JW-",
"_score" : 1.0,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T03:00:00",
"status" : "REVIEW"
}
},
{
"_index" : "my-index-000001",
"_id" : "fPhKHZQBEON9Xl1Fh5WG",
"_score" : 1.0,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T04:00:00",
"status" : "MANUAL_FAIL"
}
},
{
"_index" : "my-index-000001",
"_id" : "ffhLHZQBEON9Xl1FD5Uc",
"_score" : 1.0,
"_source" : {
"objectId" : "2",
"dateTime" : "2024-12-31T00:30:00",
"status" : "SUCCESS"
}
},
{
"_index" : "my-index-000001",
"_id" : "fvhLHZQBEON9Xl1FbJV8",
"_score" : 1.0,
"_source" : {
"objectId" : "2",
"dateTime" : "2024-12-31T01:30:00",
"status" : "REVIEW"
}
},
{
"_index" : "my-index-000001",
"_id" : "f_hLHZQBEON9Xl1F45Ux",
"_score" : 1.0,
"_source" : {
"objectId" : "2",
"dateTime" : "2024-12-31T02:30:00",
"status" : "MANUAL-SUCCESS"
}
}
]
}
}
I want to know how many objects I have per status, only taking in account latest statuses for each objectId. So I have to Group by "objectId", taking the first document ordering DESC by "dateTime" and then to Group By "status".
So I used top_hits aggregations for the first part, as a sub-aggregation and then 2 term aggregations :
{
"aggregations": {
"group_by_status": {
"terms": {
"field": "status"
},
"aggregations": {
"group_by_id": {
"terms": {
"field": "objectId"
},
"aggregations": {
"top_id_hits": {
"top_hits": {
"size": 1,
"sort": [
{
"dateTime": {
"order": "desc"
}
}
]
}
}
}
}
}
}
}
}
But the result is that it makes buckets for each status (good), and in each bucket, I have sub-bucket for each objectId (good) but it takes all the documents, not only the ones form top_hits. I have the same results as if I didn't use top_hits sub-aggregations.
Here are my results :
{
"took" : 76,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_id" : "ePhHHZQBEON9Xl1F-JWQ",
"_score" : 1.0,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T00:00:00",
"status" : "SUCCESS"
}
},
{
"_index" : "my-index-000001",
"_id" : "efhIHZQBEON9Xl1F2ZU8",
"_score" : 1.0,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T01:00:00",
"status" : "REVIEW"
}
},
{
"_index" : "my-index-000001",
"_id" : "evhJHZQBEON9Xl1FWpV-",
"_score" : 1.0,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T02:00:00",
"status" : "MANUAL-SUCCESS"
}
},
{
"_index" : "my-index-000001",
"_id" : "e_hJHZQBEON9Xl1F3JW-",
"_score" : 1.0,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T03:00:00",
"status" : "REVIEW"
}
},
{
"_index" : "my-index-000001",
"_id" : "fPhKHZQBEON9Xl1Fh5WG",
"_score" : 1.0,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T04:00:00",
"status" : "MANUAL_FAIL"
}
},
{
"_index" : "my-index-000001",
"_id" : "ffhLHZQBEON9Xl1FD5Uc",
"_score" : 1.0,
"_source" : {
"objectId" : "2",
"dateTime" : "2024-12-31T00:30:00",
"status" : "SUCCESS"
}
},
{
"_index" : "my-index-000001",
"_id" : "fvhLHZQBEON9Xl1FbJV8",
"_score" : 1.0,
"_source" : {
"objectId" : "2",
"dateTime" : "2024-12-31T01:30:00",
"status" : "REVIEW"
}
},
{
"_index" : "my-index-000001",
"_id" : "f_hLHZQBEON9Xl1F45Ux",
"_score" : 1.0,
"_source" : {
"objectId" : "2",
"dateTime" : "2024-12-31T02:30:00",
"status" : "MANUAL-SUCCESS"
}
}
]
},
"aggregations" : {
"group_by_status" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "REVIEW",
"doc_count" : 3,
"group_by_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1",
"doc_count" : 2,
"top_id_hits" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_id" : "e_hJHZQBEON9Xl1F3JW-",
"_score" : null,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T03:00:00",
"status" : "REVIEW"
},
"sort" : [
1735614000000
]
}
]
}
}
},
{
"key" : "2",
"doc_count" : 1,
"top_id_hits" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_id" : "fvhLHZQBEON9Xl1FbJV8",
"_score" : null,
"_source" : {
"objectId" : "2",
"dateTime" : "2024-12-31T01:30:00",
"status" : "REVIEW"
},
"sort" : [
1735608600000
]
}
]
}
}
}
]
}
},
{
"key" : "MANUAL-SUCCESS",
"doc_count" : 2,
"group_by_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1",
"doc_count" : 1,
"top_id_hits" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_id" : "evhJHZQBEON9Xl1FWpV-",
"_score" : null,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T02:00:00",
"status" : "MANUAL-SUCCESS"
},
"sort" : [
1735610400000
]
}
]
}
}
},
{
"key" : "2",
"doc_count" : 1,
"top_id_hits" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_id" : "f_hLHZQBEON9Xl1F45Ux",
"_score" : null,
"_source" : {
"objectId" : "2",
"dateTime" : "2024-12-31T02:30:00",
"status" : "MANUAL-SUCCESS"
},
"sort" : [
1735612200000
]
}
]
}
}
}
]
}
},
{
"key" : "SUCCESS",
"doc_count" : 2,
"group_by_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1",
"doc_count" : 1,
"top_id_hits" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_id" : "ePhHHZQBEON9Xl1F-JWQ",
"_score" : null,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T00:00:00",
"status" : "SUCCESS"
},
"sort" : [
1735603200000
]
}
]
}
}
},
{
"key" : "2",
"doc_count" : 1,
"top_id_hits" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_id" : "ffhLHZQBEON9Xl1FD5Uc",
"_score" : null,
"_source" : {
"objectId" : "2",
"dateTime" : "2024-12-31T00:30:00",
"status" : "SUCCESS"
},
"sort" : [
1735605000000
]
}
]
}
}
}
]
}
},
{
"key" : "MANUAL_FAIL",
"doc_count" : 1,
"group_by_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1",
"doc_count" : 1,
"top_id_hits" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_id" : "fPhKHZQBEON9Xl1Fh5WG",
"_score" : null,
"_source" : {
"objectId" : "1",
"dateTime" : "2024-12-31T04:00:00",
"status" : "MANUAL_FAIL"
},
"sort" : [
1735617600000
]
}
]
}
}
}
]
}
}
]
}
}
}
but in this example, I have only 2 different objectId, the first one has the status "MANUAL_FAIL" as latest, so it has to be counted only in this status bucket and the 2nd one in "MANUAL-SUCCESS" status bucket. Not all intermediate statuses.
Any help is welcome
Thanks !