How to compare two buckets and sort by result?

Hi,

I use elasticsearch 6.5.4 on Ubuntu 17.10 with openjdk version 1.8.0_171. I have an index with these mappings:

"mappings": {
  "rawdata": {
     "properties": {
       "date": {
         "type": "date"},
       "impressions": {
          "type": "integer"},
       "url": {
         "type": "keyword"}}}}

I want to do something like this:

{
        "position_history": {
          "range": {
            "field": "date",
            "ranges": [
              {"from": "2018-12-09", "to": "2018-12-16"},
              {"from": "2018-12-16", "to": "2018-12-23"}
            ]
          },
          "aggs": {
            "total_range_impressions": {
              "sum": {
                "field": "impressions"
              }
            }
          }
        },
        "change_impressions": {
          "bucket_script": {
            "buckets_path": {
              "val1": "position_history>bucket_0",
              "val2": "position_history>bucket_1"
            },
            "script": "(params.val2 - params.val1)"
          }
        },
        "sorted_keywords": {
          "bucket_sort": {
            "sort": {
              "change_impressions": {
                "order": "desc"}}}}}}}}

I want to get difference between total_impressions from first bucket and second bucket. Next, save result in aggregation change_impressions, and sort all buckets by this value. But as I know, elasticsearch not support it syntax. I found derivative aggregation. It can calculate differece, but works only for histograms. And here I got first problem. Histograms are working very strange. For example, I want to get two buckets:

  • 2018-12-09 - 2018-12-16
  • 2018-12-16 - 2018-12-23

I request histogram with interval 7 days:

 {
      "query": {
        "bool": {
          "filter": {
            "range": {
              "date": {"gte": "2018-12-09", "lte": "2018-12-23"}
            }
          }
        }
      },
      "size": 0,
          "aggs": {
            "total_impressions": {
              "sum": {
                "field": "impressions"
              }
            },
            "position_history": {
              "date_histogram": {
                "field": "date",
                "interval": "7d"
              },
              "aggs": {
                "total_range_impressions": {
                  "sum": {
                    "field": "impressions"
                  }
                },
                "end_date": {
                  "max": {
                    "field": "date"
                  }
                },
                "start_date": {
                  "min": {
                    "field": "date"
                  }
                }
              }
            },
            "sorted_keywords": {
              "bucket_sort": {
                "sort": {
                  "total_impressions": {
                    "order": "desc"}}}}}}}}

But elasticsearch returns three buckets:

  • 2018-12-09 - 2018-12-12 (interval - 4 days)
  • 2018-12-13 - 2018-12-19 (interval - 5 days)
  • 2018-12-20 - 2018-12-21 (interval - 1 day)

Why it happens? I asked for interval 7 days, no 5, no 4!

"aggregations": {
      * "position_history": {
        * "buckets": [
          * {
            * "key_as_string": "2018-12-06",
            * "doc_count": 1888,
            * "end_date": {
              * "value_as_string": "2018-12-12"},
            * "total_range_impressions": {
              * "value": 8831},
            * "start_date": {
              * "value_as_string": "2018-12-09"}}
,          * {
            * "key_as_string": "2018-12-13",
            * "end_date": {
              * "value_as_string": "2018-12-19"},
            * "total_range_impressions": {
              * "value": 17138},
            * "start_date": {
              * "value_as_string": "2018-12-13"}}
,          * {
            * "key_as_string": "2018-12-20",
            * "end_date": {
              * "value": 1545350400000,
              * "value_as_string": "2018-12-21T00:00:00.000Z"},
            * "total_range_impressions": {
              * "value": 4127},
            * "start_date": {
              * "value_as_string": "2018-12-20T00:00:00.000Z"}}]},
      * "total_impressions": {
        * "value": 30096}}

Okay, Next I was trying to use auto_date_histogram aggragation, and asked it for two buckets when dates range was from 2018-10-01 to 2018-12-01:

{
  "query": {
    "bool": {
      "filter": {
        "range": {
          "date": {
            "gte": "2018-10-01",
            "lte": "2018-12-01"
          }
        }
      }
    }
  },
  "size": 0,
  "aggs": {
    "positions": {
      "terms": {
        "field": "url",
        "size": 10000
      },
      "aggs": {
        "position_history": {
          "auto_date_histogram": {
            "field": "date",
            "buckets": "2"
          },
          "aggs": {
            "total_range_impressions": {
              "sum": {
                "field": "impressions"
              }
            },
            "end_date": {
              "max": {
                "field": "date"
              }
            },
            "start_date": {
              "min": {
                "field": "date"
              }
            }
          }
        },
        "sorted_keywords": {
          "bucket_sort": {
            "sort": {
              "total_impressions": {
                "order": "desc"
              }
            }
          }}}}}}

I was expecting to get two buckets:

  • 2018-10-01 - 2018-11-01
  • 2018-11-01 - 2018-12-01

Result:

"aggregations": {
      * "position_history": {
        * "buckets": [
          * {
            * "key_as_string": "2018-10-01",
            * "end_date": {
              * "value_as_string": "2018-12-01"},
            * "total_range_impressions": {
              * "value": 322086},
            * "start_date": {
              * "value_as_string": "2018-10-01"}}],
        * "interval": "3M"}

But elasticsearch return one bucket with interval 3 months. What happens with histograms and how I can solve the problems?

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.