This query works fine on a regular index, but when I run it against a rolled up index the resulting values are about 66% of what I'd expect. The rolled up index is also missing the sum_bucket
(not to be confused with the Chum Bucket ;).
I did some tests for these days in particular verifying that the sum of the bytes fields match when comparing the raw index to the rolled up index.
Query:
{
"size": 0,
"query": {
"bool": {
"must": [
{ "range": { "@timestamp": { "gte": "2018-03-27", "lte": "2018-03-28" } } },
{"term": { "account": "27937601" } }
]
}
},
"aggs": {
"bytes_by_interval": {
"date_histogram": {
"field": "@timestamp",
"interval": "24h"
},
"aggs": { "bytes_per_period": { "sum": { "field": "bytes" } } }
},
"sum_bytes": { "sum_bucket": { "buckets_path": "bytes_by_interval>bytes_per_period" } }
}
}
Result of running it against a regular index.
{
"took": 28,
"timed_out": false,
"_shards": {
"total": 6,
"successful": 6,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1044898,
"max_score": 0,
"hits": []
},
"aggregations": {
"bytes_by_interval": {
"buckets": [
{
"key_as_string": "2018-03-27T00:00:00.000Z",
"key": 1522108800000,
"doc_count": 604052,
"bytes_per_period": {
"value": 397239457230
}
},
{
"key_as_string": "2018-03-28T00:00:00.000Z",
"key": 1522195200000,
"doc_count": 440846,
"bytes_per_period": {
"value": 284955659441
}
}
]
},
"sum_bytes": {
"value": 682195116671
}
}
}
Result of running it against a rolled up index.
{
"took": 1,
"timed_out": false,
"terminated_early": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 0,
"max_score": 0,
"hits": []
},
"aggregations": {
"bytes_by_interval": {
"meta": {},
"buckets": [
{
"key_as_string": "2018-03-27T00:00:00.000Z",
"key": 1522108800000,
"doc_count": 434303,
"bytes_per_period": {
"value": 262840318259
}
},
{
"key_as_string": "2018-03-28T00:00:00.000Z",
"key": 1522195200000,
"doc_count": 315924,
"bytes_per_period": {
"value": 190073569431
}
}
]
}
}
}
Let me know if there's anything else you need.