I have recently upgraded from ES 6.8 to ES 8.3. One of the queries with heavy aggregations take significantly longer now compared to before. After some searching I found that in the new version ES sets the heap size automatically (to 1.8GB in my case), instead of the fixed default of 1GB in ES 6.8. It turns out that when I decrease the heap size back to 1GB the queries are faster again. I confirmed this by setting the heap size back to be determined automatically after which its significantly slower again.
The cluster is running on a single node with 4GB ram with 5 shards for the index. The index has a doc count of 18 million. Could it be that ES takes up so much heap space that there's not enough memory left for the filesystem cache? Or is there another explanation for why decreasing the heap size makes the queries slower.
For completeness here's the query we use:
POST article_set_index/_search
{
"size": 0,
"query": {
"nested": {
"path": "versions",
"score_mode": "max",
"inner_hits": {
"size": 100,
"highlight": {
"pre_tags": [
"<mark>"
],
"post_tags": [
"</mark>"
],
"fields": {
"versions.title": {
"type": "fvh",
"number_of_fragments": 0
},
"versions.body": {
"type": "fvh",
"number_of_fragments": 0
}
}
},
"sort": {
"versions.published": {
"order": "asc"
}
}
},
"query": {
"bool": {
"must": [
{
"simple_query_string": {
"query": "the",
"default_operator": "and",
"fields": [
"versions.title",
"versions.body"
]
}
}
],
"filter": [
{
"terms": {
"versions.language": [
"nl"
]
}
},
{
"range": {
"versions.created": {
"gte": 1644382800629
}
}
},
{
"range": {
"versions.publicationDate": {
"gte": "2022-02-09"
}
}
}
]
}
}
}
},
"aggs": {
"totalAuthorCount": {
"cardinality": {
"field": "authorIds",
"precision_threshold": 100
}
},
"authors": {
"terms": {
"field": "authorIds",
"size": 121,
"shard_size": 400
},
"aggs": {
"articles": {
"top_hits": {
"size": 5
}
}
}
}
},
"timeout": "60000ms"
}