Find differenence of a fields from 2 bucket values


(Leela Kumili) #1

Hi,

I have a requirement where I want to find number total number of hits for for each 1 hours interval for last 2 hours and find diff between total hits of current hours to previous hours for each action , how can i bucket script for that

Here is my aggs:

{
     "size": 0,
          "aggs": {
            "group_by_hits":
            {
              "terms": {
                "field": "action"
              },
                      "aggs": {
                        "range": {
                          "date_range": {
                            "field": "Date",
                            "ranges": [
                              {
                                "from": "now-2h",
                                "to": "now-1h",
                                "key": "earlier"
                              },
                              {
                                "from": "now-1h",
                                "to": "now",
                                "key": "latest"
                              }
                            ],
                            "keyed": true
                          },
                          "aggs": {
                            "total_count": {
                              "sum": {
                                "field": "count"
                              }
                            }
                          }
                        }
}
            }
          }
}

Sample Results:

{
  "took": 281,
  "timed_out": false,
  "_shards": {
    "total": 91,
    "successful": 91,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 3139730,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group_by_hits": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "ATB",
          "doc_count": 2613996,
          "range": {
            "buckets": {
              "earlier": {
                "from": 1527163240388,
                "from_as_string": "Thu May 24 08:00:40 2018 -0400",
                "to": 1527166840388,
                "to_as_string": "Thu May 24 09:00:40 2018 -0400",
                "doc_count": 4416,
                "total_count": {
                  "value": 26178
                }
              },
              "latest": {
                "from": 1527166840388,
                "from_as_string": "Thu May 24 09:00:40 2018 -0400",
                "to": 1527170440388,
                "to_as_string": "Thu May 24 10:00:40 2018 -0400",
                "doc_count": 4511,
                "total_count": {
                  "value": 35550
                }
              }
            }
          }
        },
        {
          "key": "QuickView",
          "doc_count": 418928,
          "range": {
            "buckets": {
              "earlier": {
                "from": 1527163240388,
                "from_as_string": "Thu May 24 08:00:40 2018 -0400",
                "to": 1527166840388,
                "to_as_string": "Thu May 24 09:00:40 2018 -0400",
                "doc_count": 290,
                "total_count": {
                  "value": 312
                }
              },
              "latest": {
                "from": 1527166840388,
                "from_as_string": "Thu May 24 09:00:40 2018 -0400",
                "to": 1527170440388,
                "to_as_string": "Thu May 24 10:00:40 2018 -0400",
                "doc_count": 398,
                "total_count": {
                  "value": 438
                }
              }
            }
          }
        }
      ]
    }
  }
}

I want to do something like this "diff_count": latest.total_count-earlier.total_count


(Zachary Tong) #2

Doing it with the range aggregation is going to be tricky/impossible, since they don't work with most pipeline aggs (see #29250).

Instead, I'd use a DateHistogram with hourly interval, then a Derivative pipeline agg to find the difference between each subsequent bucket. If you only want that two hour interval, you could limit the time range with a filter in the query. Or use the BucketSort pipeline agg to limit the data histogram to two buckets before doing the derivative.


(system) #3

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.