Bucket sorting dates

I am using Elastic 7.11 and I was trying to implement bucket_sort in a field type date and I couldn't, so I kind of workarrounded it haha, but I'm sure there is a more efficient way of doing it. This is the kind of documents I'm trying to agg

...,
"_source" : {
          "status" : "Status3",
          "emission_date" : "2021-06-30T11:53:38Z",
          "procedureTime" : "",
          "_rev" : "2-9b6766ed87afbbba3deff81b26bbd223",
          "number" : 6,
          "quota" : [
            "General"
          ],
          "user_by_procedure" : 1,
          "active" : true,
          "awaiting" : 0,
          "event" : {
            "date" : "2021-06-30T11:59:38Z",
            "doc_type" : "Event",
            "data" : {
              "section" : "Caja",
              "user" : "exost"
            },
            "name" : "Status3"
          },
          "category" : "",
          "doc_type" : "Turn",
          "room" : "",
          "implementation" : "implementation_name",
          "section" : "Caja",
          "priority" : 0,
          "csrfmiddlewaretoken" : "TOKEN",
          "identifier" : "C6",
          "procedure" : "CAJA"
        }

These are my query and aggs

GET /index_name/_search
{
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "status": "status1 status2 status3 status4"
          }
        },
        {
          "match": {
            "implementation": "implementation_name"
          }
        },
        {
          "range": {
            "emission_date": {
              "gte": "start_date",
              "lte": "end_date"
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "group_by_emission_date": {
      "aggs": {
        "group_by_implementation": {
          "aggs": {
            "group_by_identyfier": {
              "aggs": {
                "sort_by_occurrence": {
                  "aggs": {
                    "group_by_status": {
                      "terms": {
                        "field": "status.keyword"
                      }
                    }
                  },
                  "date_histogram": {
                    "field": "event.date",
                    "fixed_interval": "1s",
                    "min_doc_count": 1,
                    "order": {
                      "_key": "desc"
                    }
                  }
                }
              }, 
              "terms": {
                "field": "identifier.keyword"
              }
            }
          }, 
          "terms": {
            "field": "implementation.keyword"
            }
        }
      }, 
      "date_histogram": {
        "field": "emission_date",
        "fixed_interval": "1s",
        "min_doc_count": 1
      }
    }
  }
}

I can't avoid that much of grouping arguments because of the lack of an identifier from the source. But that's not the issue. As you can see I'm using two times date_histogram , but the second one on event.date ("sort_by_occurrence") is the one I'm not sure of. What I want is to get the last occurrence (aka: status1, status2, etc) of the event field, so I grouped them by 1 second interval and I will then, with some logic, get the last "Status" of the event.

And what I get back is OK:

"group_by_emission_date" : {
      "buckets" : [
        {
          "key_as_string" : "2021-06-30T11:45:22.000Z",
          "key" : 1625053522000,
          "doc_count" : 4,
          "group_by_implementation" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "implementation_name",
                "doc_count" : 4,
                "group_by_identyfier" : {
                  "doc_count_error_upper_bound" : 0,
                  "sum_other_doc_count" : 0,
                  "buckets" : [
                    {
                      "key" : "C4",
                      "doc_count" : 4,
                      "sort_by_occurrence" : {
                        "buckets" : [
                          {
                            "key_as_string" : "2021-06-30T11:56:44.000Z",
                            "key" : 1625054204000,
                            "doc_count" : 1,
                            "group_by_status" : {
                              "doc_count_error_upper_bound" : 0,
                              "sum_other_doc_count" : 0,
                              "buckets" : [
                                {
                                  "key" : "Status1",
                                  "doc_count" : 1
                                }
                              ]
                            }
                          },
                          {
                            "key_as_string" : "2021-06-30T11:55:43.000Z",
                            "key" : 1625054143000,
                            "doc_count" : 1,
                            "group_by_status" : {
                              "doc_count_error_upper_bound" : 0,
                              "sum_other_doc_count" : 0,
                              "buckets" : [
                                {
                                  "key" : "Status2",
                                  "doc_count" : 1
                                }
                              ]
                            }
                          },
                          {
                            "key_as_string" : "2021-06-30T11:51:11.000Z",
                            "key" : 1625053871000,
                            "doc_count" : 1,
                            "group_by_status" : {
                              "doc_count_error_upper_bound" : 0,
                              "sum_other_doc_count" : 0,
                              "buckets" : [
                                {
                                  "key" : "Status3",
                                  "doc_count" : 1
                                }
                              ]
                            }
                          },
                          {
                            "key_as_string" : "2021-06-30T11:47:34.000Z",
                            "key" : 1625053654000,
                            "doc_count" : 1,
                            "group_by_status" : {
                              "doc_count_error_upper_bound" : 0,
                              "sum_other_doc_count" : 0,
                              "buckets" : [
                                {
                                  "key" : "Status4",
                                  "doc_count" : 1
                                }
                              ]
                            }
                          }
                        ]
                      }
                    }
                  ]
                }
              }
            ]
          }
        }

event.date mapping:

{
  "index_name" : {
    "mappings" : {
      "event.date" : {
        "full_name" : "event.date",
        "mapping" : {
          "date" : {
            "type" : "date"
          }
        }
      }
    }
  }
}

I'm using date_histogram for something it's not supposed to, and couldn't get working bucket_sort on event.date . I do also don't care about the other statuses that happened before the last one, but i can't truncate them using date_histogram .

I hope the case is clear, and I'm deeply thankful for the help in advance.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.