SubAggregations with array fields - more results than expected


(Eliran Moyal) #1

Hey, i have a index contains a field named "message" which could by a string or array of strings.

i'm trying to do query like:
select sumsum , message, count(*) from myIndex
where message in ('a','d')
group by sumsum , message

so i did a simple query with terms agg sub and term sub aggregations
but got more buckets than expected (messages which are not "a" or "d")

someone told me to try and use nested_objects.
so i re-indexed my data, and changed the query to:
{
"from": 0,
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": {
"or": {
"filters": [
{
"nested": {
"path": "message",
"query": {
"match": {
"message.name": {
"query": "a",
"type": "phrase"
}
}
}
}
},
{
"nested": {
"path": "message",
"query": {
"match": {
"message.name": {
"query": "d",
"type": "phrase"
}
}
}
}
}
]
}
}
}
}
}
},
"aggregations": {
"sumsum": {
"terms": {
"field": "sumsum",
"size": 3
},
"aggregations": {
"message.name": {
"nested": {
"path": "message"
},
"aggregations": {
"names": {
"terms": {
"field": "message.name",
"size": 0
}
}
}
}
}
}
}
}

but still got more buckets than expected, example to output:
{
"took": 37,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 194,
"max_score": 0,
"hits": [

]

},
"aggregations": {
"sumsum": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 115,
"buckets": [
{
"key": 0,
"doc_count": 28,
"message.name": {
"doc_count": 28,
"names": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "a",
"doc_count": 17
},
{
"key": "d",
"doc_count": 11
},
{
"key": "c",
"doc_count": 1
},
{
"key": "f",
"doc_count": 1
},
{
"key": "h",
"doc_count": 1
},
{
"key": "i",
"doc_count": 1
}
]
}
}
},
{
"key": 1,
"doc_count": 27,
"message.name": {
"doc_count": 27,
"names": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "d",
"doc_count": 14
},
{
"key": "a",
"doc_count": 13
},
{
"key": "f",
"doc_count": 1
},
{
"key": "g",
"doc_count": 1
},
{
"key": "h",
"doc_count": 1
}
]
}
}
},
{
"key": 2,
"doc_count": 24,
"message.name": {
"doc_count": 24,
"names": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "a",
"doc_count": 12
},
{
"key": "d",
"doc_count": 12
},
{
"key": "b",
"doc_count": 1
}
]
}
}
}
]
}
}
}


(system) #2