Hi there,
Hoping for some guidance on how to optimise the following query. It's the slowest running query in our app. This is run against some very large indices, one in the region of 1.5B docs. More often than not this exceeds 30 seconds and therefore the Ruby app running this returns a timeout.
The intended result is what we call 'Headline Metrics', it's a current period vs previous period comparison, split by vendor for any number of isrcs (audio track identifiers). See image below:
The query itself is as follows, i'm pretty certain the repetition in the terms filters can be avoided, but would really appreciate some general help here.
{
"query": {
"bool": {
"must": [{
"terms": {
"vendor": ["amazon", "itunes", "spotify", "google"]
}
}, {
"terms": {
"isrc": ["GBCEL1200424", "GBCEL1200425", "GBCEL1200310", "GBCEL1200426", "GBCEL1200427"]
}
}]
}
},
"size": 0,
"aggs": {
"headline_metrics": {
"filters": {
"filters": {
"streams": {
"bool": {
"must": [{
"range": {
"date": {
"gte": "2016-06-05",
"lte": "2016-12-06"
}
}
}, {
"terms": {
"vendor": ["amazon", "itunes", "spotify", "google"]
}
}, {
"terms": {
"isrc": ["GBCEL1200424", "GBCEL1200425", "GBCEL1200310", "GBCEL1200426", "GBCEL1200427"]
}
}]
}
}
}
},
"aggs": {
"data_split": {
"terms": {
"field": "isrc"
},
"aggs": {
"total_streams": {
"sum": {
"field": "units"
}
},
"date_periods": {
"date_range": {
"field": "date",
"format": "YYYY-MM-dd",
"ranges": [{
"from": "2016-09-05",
"to": "2016-12-06"
}, {
"from": "2016-06-05",
"to": "2016-09-05"
}]
},
"aggs": {
"unit_sum": {
"sum": {
"field": "units"
}
},
"vendor_split": {
"terms": {
"field": "vendor",
"size": 5,
"order": {
"one": "desc"
}
},
"aggs": {
"one": {
"sum": {
"field": "units"
}
}
}
}
}
}
}
}
}
}
}
}
Thanks in advance!