Post Filtering Date histogram aggregation bucket results not working as intended

I have an aggregation query where I am trying to calculate the max standard deviation of the number of destination ips per IP Address for a certain time range. As everyone knows the common problem with the moving function std_dev aggregation function, the first 2 days' std dev values will always be null and 0 respectively due to no data being taken into account previously.

Here is my aggregation query:

{
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "exists": {
            "field": "aggregations.range.buckets.by ip.buckets.by date.buckets.max_dest_ips.value"
          }
        }
      ]
    }
  },
  "aggs": {
    "range": {
      "date_range": {
        "field": "Source Time",
        "ranges": [
          {
            "from": "2018-04-25",
            "to": "2018-05-02"
          }
        ]
      },
      "aggs": {
        "by ip": {
          "terms": {
            "field": "IP Address.keyword",
            "size": 500
          },
          "aggs": {
            "datehisto": {
              "date_histogram": {
                "field": "Source Time",
                "interval": "day"
              },
              "aggs": {
                "max_dest_ips": {
                  "sum": {
                    "field": "aggregations.range.buckets.by ip.buckets.by date.buckets.max_dest_ips.value"
                  }
                },
                "max_dest_ips_std_dev": {
                  "moving_fn": {
                    "buckets_path": "max_dest_ips",
                    "window": 3,
                    "script": "MovingFunctions.stdDev(values, MovingFunctions.unweightedAvg(values))"
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  "post_filter": {
    "range": {
      "Source Time": {
        "gte": "2018-05-01"
      }
    }
  }
}

Here is a snippet of the response:

{
"key": "192.168.0.1",
"doc_count": 6,
"datehisto": {
  "buckets": [
    {
      "key_as_string": "2018-04-25T00:00:00.000Z",
      "key": 1524614400000,
      "doc_count": 1,
      "max_dest_ips": {
        "value": 309
      },
      "max_dest_ips_std_dev": {
        "value": null
      }
    },
    {
      "key_as_string": "2018-04-26T00:00:00.000Z",
      "key": 1524700800000,
      "doc_count": 1,
      "max_dest_ips": {
        "value": 529
      },
      "max_dest_ips_std_dev": {
        "value": 0
      }
    },
    {
      "key_as_string": "2018-04-27T00:00:00.000Z",
      "key": 1524787200000,
      "doc_count": 1,
      "max_dest_ips": {
        "value": 408
      },
      "max_dest_ips_std_dev": {
        "value": 110
      }
    },
    {
      "key_as_string": "2018-04-28T00:00:00.000Z",
      "key": 1524873600000,
      "doc_count": 1,
      "max_dest_ips": {
        "value": 187
      },
      "max_dest_ips_std_dev": {
        "value": 89.96419040682551
      }
    }
]
}
}

What I want is for the first 2 days' bucket data (25th and 26th) to be filtered and removed from the above bucket results. I have tried the post filter above and the normal query filter below:

  "filter": {
    "range": {
      "Source Time": {
        "gte": "2018-04-27"
      }
    }
  }

The Post Filter does nothing and doesn't work. The above filter range query makes the buckets start from the 27th but also makes the standard deviation calculations start on 27th as well (resulting in 27th being null and 28th being 0) when I want it to start from the 25th instead.

Any other alternative solutions to calculate moving standard deviation? Help is greatly appreciated!

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.