I have an index with over 30M documents. Every document has a parentId field and a createdOn field. Modified documents have a modifiedOn field. Dates are stored as epoch_millis. I need to find the top 10 most-recent documents per parentId if at least one of the "child" documents has been created/modified within a date range. I tried using a composite aggregation, but it will take way too long to complete.
{
"aggregations": {
"lists": {
"composite": {
"sources": [
{
"parentId": {
"terms": {
"field": "parentId"
}
}
}
],
"size": 100
},
"aggregations": {
"most_recent": {
"max": {
"script": {
"source": "doc['modifiedOn'].size() == 0 || doc['modifiedOn'].value.millis == null ? doc['createdOn'].value.millis : doc['modifiedOn'].value.millis"
}
}
},
"most_recent_filter": {
"bucket_selector": {
"buckets_path": {
"modified_on": "most_recent"
},
"script": "params.modified_on >= 1552003847225L"
}
},
"top_items": {
"top_hits": {
"_source": [
"parentId",
"otherFields",
"createdOn",
"modifiedOn"
],
"sort": {
"_script": {
"type": "number",
"script": {
"source": "doc['modifiedOn'].size() == 0 || doc['modifiedOn'].value.millis == null ? doc['createdOn'].value.millis : doc['modifiedOn'].value.millis"
},
"order": "desc",
"mode": "max"
}
},
"size": 10
}
}
}
}
},
"size": 0
}
Is there a better way to do this?