Hello,
I am experiencing an unexpected behaviour (imho) of date histogram aggregation when both offset and extended_bounds options are used.
Query using both offset and extended_bounds that returns unexpected results:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2017-04-04T02:30:00",
"lte": "2017-04-05T02:29:59",
"time_zone": "Europe/Berlin"
}
}
}
]
}
},
"aggs": {
"time_aggregation": {
"date_histogram": {
"field": "timestamp",
"interval": "day",
"min_doc_count": 0,
"extended_bounds": {
"min": "2017-04-04T02:30:00",
"max": "2017-04-05T02:29:59"
},
"offset": "+150m",
"time_zone": "Europe/Berlin"
},
"aggs": {}
}
}
}
Reponse:
{
"took": 39,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 77906,
"max_score": 0,
"hits": [
]
},
"aggregations": {
"time_aggregation": {
"buckets": [
{
"key_as_string": "2017-04-04T00:00:00.000+02:00",
"key": 1491256800000,
"doc_count": 0
},
{
"key_as_string": "2017-04-04T02:30:00.000+02:00",
"key": 1491265800000,
"doc_count": 77906
},
{
"key_as_string": "2017-04-05T00:00:00.000+02:00",
"key": 1491343200000,
"doc_count": 0
}
]
}
}
}
It is unexpected because I expect only one bucket (the one with key "2017-04-04T02:30:00.000+02:00").
If I use only offset without extended_bounds:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2017-04-04T02:30:00",
"lte": "2017-04-05T02:29:59",
"time_zone": "Europe/Berlin"
}
}
}
]
}
},
"aggs": {
"time_aggregation": {
"date_histogram": {
"field": "timestamp",
"interval": "day",
"min_doc_count": 0,
"offset": "+150m",
"time_zone": "Europe/Berlin"
},
"aggs": {}
}
}
}
I get the expected response:
{
"took": 41,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 77906,
"max_score": 0,
"hits": [
]
},
"aggregations": {
"time_aggregation": {
"buckets": [
{
"key_as_string": "2017-04-04T02:30:00.000+02:00",
"key": 1491265800000,
"doc_count": 77906
}
]
}
}
}
And if I use only extended_bounds without offset:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2017-04-04T00:00:00",
"lte": "2017-04-04T23:59:59",
"time_zone": "Europe/Berlin"
}
}
}
]
}
},
"aggs": {
"time_aggregation": {
"date_histogram": {
"field": "timestamp",
"interval": "day",
"min_doc_count": 0,
"extended_bounds": {
"min": "2017-04-04T00:00:00",
"max": "2017-04-04T23:59:59"
},
"time_zone": "Europe/Berlin"
},
"aggs": {}
}
}
}
I get the expected response:
{
"took": 432,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 68107,
"max_score": 0,
"hits": [
]
},
"aggregations": {
"time_aggregation": {
"buckets": [
{
"key_as_string": "2017-04-04T00:00:00.000+02:00",
"key": 1491256800000,
"doc_count": 68107
}
]
}
}
}
I need to use both options because I want empty buckets if there are not documents in the interval.
I am doing anything wrong?
Thanks!