Hi community!
My Elasticsearch version is 7.7.0. I'm facing a weird issue for my aggregation query.
Let's say we have a car index, car data model looks like below:
{
"vin": "08c00679-8a58-11ec-9745-1aed518f8637",
"make": "Lexus",
"model": "GS",
"country": "US",
"state": "CA",
"city": "San-Diego",
"color": "black",
// some other fields
"dates": [
{
"type": "manufacture",
"date": "2010-01-01"
},
{
"type": "lastRepair",
"date": "2022-01-02"
}
]
}
User can group data in various ways (e.g. by state, cite, make, etc.), the result should have any 5 values for each field. For this purpose I'm using aggregation query with scripted metric aggregation:
GET cars.read / _search {
"size": 0,
"query": {
...
},
"aggregations": {
"root": {
"composite": {
"size": 50,
"sources": [{
"countries": {
"terms": {
"field": "country"
}
}
}, {
"states": {
"terms": {
"field": "state"
}
}
}]
},
"aggregations": {
"cities": {
"scriptedMetric": {
"init_script": "state.values = [];",
"map_script": "if (state.values.length < 5) {state.values.add(doc['city'].value)}",
"combine_script": "return state.values;",
"reduce_script": "def result = []; for (s in states) { for (v in s) { if (!result.contains(v) && result.length < 5) { result.add(value); }}} return result;",
}
},
"citiesCount": {
"cardinality": {
"field": "city"
}
},
// other fields aggregations
"datesNested": {
"nested": {
"path": "dates"
},
"aggregations": {
"dates": {
"terms": {
"field": "dates.type",
"size": 10
},
"aggregations": {
"values": {
"scriptedMetric": {
"init_script": "state.values = [];",
"map_script": "if (state.values.length < 5) {state.values.add(params._source['date'].value)}",
"combine_script": "return state.values;",
"reduce_script": "def result = []; for (s in states) { for (v in s) { if (!result.contains(v) && result.length < 5) { result.add(value); }}} return result;",
}
},
"valueCount": {
"cardinality": {
"field": "dates.date",
"missing": 0
}
}
}
}
}
}
}
}
}
}
Scripted metric aggregation works perfectly well for non-nested fields (e.g. city
), but for nested fields there is an issue with map
script: both doc
and params._source
seem to be empty objects. But in reduce
script these objects are not empty: I'm able to see actual values for result of aggregation like below:
"datesNested": {
"nested": {
"path": "dates"
},
"aggregations": {
"dateValues": {
"scripted_metric": {
"init_script": "state.values = [];",
"map_script": "state.values.add(params._source)",
"combine_script": "return state.values;",
"reduce_script": "return states;"
}
}
}
}
I tried the same scripted metric aggregation for non-nested and nested:
"aggregations": {
"dateValues": {
"scripted_metric": {
"init_script": "state.values = [];",
"map_script": "state.values.add(params._source.isEmpty())",
"combine_script": "return state.values;",
"reduce_script": "return states;"
}
},
"datesNested": {
"nested": {
"path": "dates"
},
"aggregations": {
"dateValues": {
"scripted_metric": {
"init_script": "state.values = [];",
"map_script": "state.values.add(params._source.isEmpty())",
"combine_script": "return state.values;",
"reduce_script": "return states;"
}
}
}
}
}
The result was something like below:
{
"dateValues": {
"value": [
[
false
],
[
false
],
[
false
],
[
false
],
[
false,
false
]
]
},
"datesNested": {
"doc_count": 6,
"dateValues": {
"value": [
[
true
],
[
true
],
[
true
],
[
true,
true
],
[
true
]
]
}
}
}
Is it possible to use scripted metric inside nested aggregation?
Thanks in advance,
Alex