Hi,
I'm currently looking at using reverse nested aggregations with filters and have a question regarding the filter 'scope' within the aggregations. It's probably best explained through examples:
Create an index with the following mapping:
"_doc": {
"properties": {
"firstNested": {
"type": "nested",
"properties": {
"firstNestedName": {
"type": "keyword"
},
"count": {
"type": "long"
},
"secondNested": {
"type": "nested",
"properties": {
"secondNestedName": {
"type": "keyword"
},
"count": {
"type": "long"
}
}
}
}
},
"count": {
"type": "long"
},
"rootName": {
"type": "keyword"
}
}
}
}
Index a document:
"count" : 100,
"rootName" : 10000,
"firstNested" : [
{
"firstNestedName" : "Nested1",
"count" : 20,
"secondNested" : [
{
"secondNestedName" : "Jim",
"count" : 10
},
{
"secondNestedName" : "Bill",
"count" : 10
}
]
},
{
"firstNestedName" : "Nested2",
"count" : 80,
"secondNested" : [
{
"secondNestedName" : "Jim",
"count" : 50
},
{
"secondNestedName" : "Bill",
"count" : 30
}
]
}
]
}
Now run an aggregation which should filter on the secondNestedName field for the value "Jim" and run a terms aggregation on the rootName with the count field from the secondNested document being summed:
"size": 0,
"query": {
"match_all": {}
},
"aggregations": {
"firstNested": {
"nested": {
"path": "firstNested"
},
"aggregations": {
"secondNested": {
"nested": {
"path": "firstNested.secondNested"
},
"aggregations": {
"secondNestedNameFilter": {
"filter": {
"match": {
"firstNested.secondNested.secondNestedName": "Jim"
}
},
"aggregations": {
"rootName_reverse": {
"reverse_nested": {},
"aggregations": {
"rootName_values": {
"terms": {
"field": "rootName"
},
"aggregations": {
"secondNestedSum": {
"nested": {
"path": "firstNested.secondNested"
},
"aggregations": {
"secondNestedNameFilterRepeated": {
"filter": {
"match": {
"firstNested.secondNested.secondNestedName": "Jim"
}
},
"aggregations": {
"totalCount": {
"sum": {
"field": "firstNested.secondNested.count"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
The response from this is:
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0,
"hits": []
},
"aggregations": {
"firstNested": {
"doc_count": 2,
"secondNested": {
"doc_count": 4,
"secondNestedNameFilter": {
"doc_count": 2,
"rootName_reverse": {
"doc_count": 1,
"rootName_values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "10000",
"doc_count": 1,
"secondNestedSum": {
"doc_count": 4,
"totalCount": {
"value": 100
}
}
}
]
}
}
}
}
}
}
}
I'd expect the secondNestedSum value to be 60 with 2 matching documents, not 4. I can repeat the filter in the "secondNestedSum" aggregation to get the correct result however this means duplication. I'm assuming that the reverse nested aggregation is causing the sub-aggregations to lose the current filter context? Is there any other way to resolve this than duplicating the filter(s)?
Thanks,
Brent