Find differenence of a fields from 2 bucket values

Hi,

I have a requirement where I want to find number total number of hits for for each 1 hours interval for last 2 hours and find diff between total hits of current hours to previous hours for each action , how can i bucket script for that

Here is my aggs:

{
     "size": 0,
          "aggs": {
            "group_by_hits":
            {
              "terms": {
                "field": "action"
              },
                      "aggs": {
                        "range": {
                          "date_range": {
                            "field": "Date",
                            "ranges": [
                              {
                                "from": "now-2h",
                                "to": "now-1h",
                                "key": "earlier"
                              },
                              {
                                "from": "now-1h",
                                "to": "now",
                                "key": "latest"
                              }
                            ],
                            "keyed": true
                          },
                          "aggs": {
                            "total_count": {
                              "sum": {
                                "field": "count"
                              }
                            }
                          }
                        }
}
            }
          }
}

Sample Results:

{
  "took": 281,
  "timed_out": false,
  "_shards": {
    "total": 91,
    "successful": 91,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 3139730,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group_by_hits": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "ATB",
          "doc_count": 2613996,
          "range": {
            "buckets": {
              "earlier": {
                "from": 1527163240388,
                "from_as_string": "Thu May 24 08:00:40 2018 -0400",
                "to": 1527166840388,
                "to_as_string": "Thu May 24 09:00:40 2018 -0400",
                "doc_count": 4416,
                "total_count": {
                  "value": 26178
                }
              },
              "latest": {
                "from": 1527166840388,
                "from_as_string": "Thu May 24 09:00:40 2018 -0400",
                "to": 1527170440388,
                "to_as_string": "Thu May 24 10:00:40 2018 -0400",
                "doc_count": 4511,
                "total_count": {
                  "value": 35550
                }
              }
            }
          }
        },
        {
          "key": "QuickView",
          "doc_count": 418928,
          "range": {
            "buckets": {
              "earlier": {
                "from": 1527163240388,
                "from_as_string": "Thu May 24 08:00:40 2018 -0400",
                "to": 1527166840388,
                "to_as_string": "Thu May 24 09:00:40 2018 -0400",
                "doc_count": 290,
                "total_count": {
                  "value": 312
                }
              },
              "latest": {
                "from": 1527166840388,
                "from_as_string": "Thu May 24 09:00:40 2018 -0400",
                "to": 1527170440388,
                "to_as_string": "Thu May 24 10:00:40 2018 -0400",
                "doc_count": 398,
                "total_count": {
                  "value": 438
                }
              }
            }
          }
        }
      ]
    }
  }
}

I want to do something like this "diff_count": latest.total_count-earlier.total_count

Doing it with the range aggregation is going to be tricky/impossible, since they don't work with most pipeline aggs (see #29250).

Instead, I'd use a DateHistogram with hourly interval, then a Derivative pipeline agg to find the difference between each subsequent bucket. If you only want that two hour interval, you could limit the time range with a filter in the query. Or use the BucketSort pipeline agg to limit the data histogram to two buckets before doing the derivative.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.