I have big trouble with cardinality and term aggregation. Different aggregation over my index with 2700k entries deliver me diverse count/sum. Is it possible to get the correct count from ES like MySql or is ES not the best store to handle my data.
My ES Version is 6.0.1
This is my query with some aggregation, where i use different techniques to aggregate my dataset
Unique sessions
- cardinality = 44689
- terms + script > sum = 44678
Day histogram with unique sessions
- date_histogram > terms + script > sum = 44724
- date_histogram > cardinality = 44723
I don't known where is my mistake and why ES show me different result.
GET campaign-1/_search?filter_path=hits.total,aggregations.dayPerformance.buckets.dayCount,aggregations.dayPerformance.buckets.key_as_string,aggregations.uniqueSession,aggregations.totalCount,aggregations.totalTermCount,aggregations.dayPerformance.buckets.uniqueCount,aggregations.totalCount2
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match_all": {}
},
{
"term": {
"payout.term": 1
}
},
{
"range": {
"createdAt": {
"gte": "1524787200000",
"lte": "1525564740000",
"format": "epoch_millis"
}
}
},
{
"term": {
"environment": "LIVE"
}
}
],
"must_not": [
{
"term": {
"event": ""
}
},
{
"term": {
"session": ""
}
},
{
"term": {
"event": "View"
}
}
]
}
},
"aggs": {
"dayPerformance": {
"date_histogram": {
"field": "createdAt",
"interval": "day"
},
"aggs": {
"uniqueInteractions": {
"terms": {
"field": "session",
"size": 100000,
"shard_size": 1000
},
"aggs": {
"forCount": {
"bucket_script": {
"buckets_path": {
"count": "_count"
},
"script": "1"
}
}
}
},
"dayCount": {
"sum_bucket": {
"buckets_path": "uniqueInteractions>forCount"
}
},
"uniqueCount": {
"cardinality": {
"field": "session",
"precision_threshold": 40000
}
}
}
},
"uniqueSession": {
"cardinality": {
"field": "session",
"precision_threshold": 40000
}
},
"totalCount": {
"sum_bucket": {
"buckets_path": "dayPerformance>dayCount"
}
},
"totalCount2": {
"sum_bucket": {
"buckets_path": "dayPerformance>uniqueCount"
}
},
"uniqueSessionTerm": {
"terms": {
"field": "session",
"size": 10000000,
"shard_size": 15
},
"aggs": {
"forCount": {
"bucket_script": {
"buckets_path": {
"count": "_count"
},
"script": "1"
}
}
}
},
"totalTermCount": {
"sum_bucket": {
"buckets_path": "uniqueSessionTerm>forCount"
}
}
}
}
And this is my Response:
{
"hits": {
"total": 55835
},
"aggregations": {
"uniqueSession": {
"value": 44689
},
"dayPerformance": {
"buckets": [
{
"key_as_string": "2018-04-27T00:00:00.000Z",
"uniqueCount": {
"value": 4387
},
"dayCount": {
"value": 4387
}
},
{
"key_as_string": "2018-04-28T00:00:00.000Z",
"uniqueCount": {
"value": 14
},
"dayCount": {
"value": 14
}
},
{
"key_as_string": "2018-04-29T00:00:00.000Z",
"uniqueCount": {
"value": 850
},
"dayCount": {
"value": 850
}
},
{
"key_as_string": "2018-04-30T00:00:00.000Z",
"uniqueCount": {
"value": 1839
},
"dayCount": {
"value": 1840
}
},
{
"key_as_string": "2018-05-01T00:00:00.000Z",
"uniqueCount": {
"value": 8487
},
"dayCount": {
"value": 8487
}
},
{
"key_as_string": "2018-05-02T00:00:00.000Z",
"uniqueCount": {
"value": 5875
},
"dayCount": {
"value": 5874
}
},
{
"key_as_string": "2018-05-03T00:00:00.000Z",
"uniqueCount": {
"value": 10923
},
"dayCount": {
"value": 10925
}
},
{
"key_as_string": "2018-05-04T00:00:00.000Z",
"uniqueCount": {
"value": 11473
},
"dayCount": {
"value": 11472
}
},
{
"key_as_string": "2018-05-05T00:00:00.000Z",
"uniqueCount": {
"value": 875
},
"dayCount": {
"value": 875
}
}
]
},
"totalCount": {
"value": 44724
},
"totalCount2": {
"value": 44723
},
"totalTermCount": {
"value": 44678
}
}
}