We are currently moving from Elasticsearch 5.6.4 to Elasticsearch 7.9.1 and are seeing some concerning differences in disk usage and CPU usage, which we suspect may be a symptom of the larger indices.
The same data with largely the same mappings and analysis settings is in both clusters. Same number of shards and replicas. An index with 81.6k docs in ES7 is 15.6GB while an index in ES5 with the same docs in it is 869MB. An ~18x increase in disk usage doesn't seem like something we would expect.
Looking through other disk usage threads I've checked a few things:
- That the store size, not the transaction log size is what's eating up space.
- That the store and doc_values defaults have not changed between the two versions
I've started re-reading release notes for all the versions in between with an eye towards disk usage related things but thought I'd see if anyone had any ideas or information that might help in the meantime.
Here is the mapping template and analysis settings:
{
"mappings": {
"properties": {
"field1": {
"type": "keyword"
},
"field2": {
"type": "integer"
},
"field3": {
"type": "integer"
},
"field4": {
"type": "keyword"
},
"field5": {
"analyzer": "ngram_suggest",
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"field6": {
"type": "keyword"
},
"field7": {
"type": "keyword"
},
"field8": {
"type": "boolean"
},
"field9": {
"type": "boolean"
},
"field10": {
"type": "text"
},
"field11": {
"type": "boolean"
},
"field12": {
"type": "boolean"
},
"field13": {
"type": "boolean"
},
"field14": {
"type": "boolean"
},
"field15": {
"type": "boolean"
},
"field16": {
"type": "boolean"
},
"field17": {
"type": "boolean"
},
"field18": {
"type": "boolean"
},
"field19": {
"type": "boolean"
},
"field20": {
"type": "boolean"
},
"field21": {
"type": "boolean"
},
"field22": {
"type": "boolean"
},
"field23": {
"type": "boolean"
},
"field24": {
"type": "boolean"
},
"field25": {
"type": "boolean"
},
"field26": {
"type": "boolean"
},
"field27": {
"analyzer": "normalized",
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"field28": {
"type": "keyword"
},
"field29": {
"type": "integer"
},
"field30": {
"type": "integer"
},
"field31": {
"analyzer": "normalized",
"fields": {
"raw": {
"type": "keyword"
}
},
"type": "text"
},
"field32": {
"analyzer": "ngram_partial",
"type": "text"
},
"field33": {
"analyzer": "ngram_suggest",
"type": "text"
},
"field34": {
"type": "boolean"
},
"field35": {
"type": "boolean"
},
"field36": {
"type": "keyword"
},
"field37": {
"type": "keyword"
},
"field38": {
"type": "integer"
},
"field39": {
"type": "keyword"
},
"field40": {
"type": "boolean"
},
"field41": {
"type": "boolean"
},
"field42": {
"scaling_factor": 100000,
"type": "scaled_float"
},
"field43": {
"type": "integer"
},
"field44": {
"type": "keyword"
},
"field45": {
"analyzer": "normalized",
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"field46": {
"analyzer": "normalized",
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"field47": {
"analyzer": "ngram_suggest",
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text"
},
"field48": {
"type": "boolean"
},
"field49": {
"enabled": false
},
"field50": {
"type": "long"
},
"field51": {
"type": "boolean"
}
}
},
"settings": {
"index": {
"analysis": {
"analyzer": {
"ngram_partial": {
"filter": [
"asciifolding",
"lowercase"
],
"tokenizer": "ngram"
},
"ngram_suggest": {
"filter": [
"asciifolding",
"lowercase"
],
"tokenizer": "edge_ngram"
},
"normalized": {
"filter": [
"asciifolding",
"lowercase",
"english_stemmer"
],
"tokenizer": "standard",
"type": "custom"
}
},
"filter": {
"english_stemmer": {
"language": "english",
"type": "stemmer"
}
},
"tokenizer": {
"edge_ngram": {
"max_gram": "20",
"min_gram": "1",
"token_chars": [
"letter",
"digit",
"punctuation"
],
"type": "edge_ngram"
},
"ngram": {
"max_gram": "3",
"min_gram": "3",
"token_chars": [
"letter",
"digit",
"punctuation"
],
"type": "ngram"
}
}
}
}
}
}