Trying to design a query that will return customer data bucketed into various time ranges. The total number of documents being aggregated on will always be scoped to a given client, which in practice should never exceed a few thousand. It seems that this query takes ~3-4 seconds regardless of what I do to tune it and irrespective of the size of the client.
Based on my research, this delay is coming from building the global ordinals. This is a very high cardinality field. The only thing that speeds up the query is to enable eager_global_ordinals
on the invoice_payer_contact_token
field, but that causes our CPU / RAM usage to spike like crazy.
# customer aging report (full)
GET /invoices/invoice/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"invoice_unit_token": "KM9TQK5WVWXV3"
}
},
{
"term": {
"invoice_state": "UNPAID"
}
}
]
}
},
"aggs": {
"range": {
"date_range": {
"field": "invoice_due_on",
"ranges": [
{
"from": "now/d-30d/d",
"key": "0-30 days"
},
{
"from": "now-60d/d",
"to": "now/d-31d/d",
"key": "31-60 days"
},
{
"from": "now/d-90d/d",
"to": "now/d-61d/d",
"key": "61-90 days"
},
{
"from": "now/d-120d/d",
"to": "now/d-91d/d",
"key": "91-120 days"
},
{
"to": "now/d-91d/d",
"key": "91 days or older"
},
{
"to": "now/d",
"key": "all-time"
}
]
},
"aggs": {
"customers": {
"terms": {
"field": "invoice_payer_contact_token",
"size": 1000,
"order": {
"total_requested_amount": "desc"
}
},
"aggs": {
"total_requested_amount": {
"sum": {
"field": "invoice_requested_amount_cents"
}
}
}
}
}
}
}
}