We are experiencing an issue with Logstash where the user_dictionary_rules, stopwords, and synonyms data are not being properly indexed based on the template file in an EC2 environment.
When these data sets, specifically user_dictionary_rules, stopwords, and synonyms, are small in size, they are being applied correctly. However, when the quantities increase as listed below, we are encountering indexing issues on our EC2 environment:
- user_dictionary_rules: 2,750
- topwords: 380
- synonyms: 71
Our AWS EC2 instance specifications are as follows:
- Instance Type: t2.xlarge
- 4 vCPUs
- 16 GiB Memory
What can be done to resolve this problem?
{
"index_patterns": [ "product", "product-*"],
"template": {
"settings": {
"index": {
"analysis": {
"tokenizer": {
"nori_analyzer": {
"type": "nori_tokenizer",
"decompound_mode": "mixed",
"discard_punctuation": false,
"user_dictionary_rules": [....]
}
},
"filter": {
"nori_filter": {
"type": "nori_part_of_speech",
"stoptags": [
"E", "IC", "J", "MAG", "MAJ", "MM", "SP", "SSC", "SSO", "SC", "SE", "XPN", "XSA", "XSN", "XSV", "UNA", "NA", "VSV"
]
},
"stop_filter": {
"type": "stop",
"stopwords": [...]
},
"synonym_filter": {
"type": "synonym",
"lenient": true,
"synonyms": [...]
}
},
"analyzer": {
"korean": {
"type": "custom",
"tokenizer": "nori_analyzer",
"filter": ["lowercase", "stop", "nori_filter", "stop_filter", "synonym_filter"],
"char_filter": ["html_strip"]
}
}
}
}
},
"mappings": {
"_source": {
"enabled": true
},
"properties": {
"fullName": {
"type": "text",
"analyzer": "korean"
},
"product": {
"type": "text",
"analyzer": "korean"
},
"option": {
"type": "text",
"analyzer": "korean"
},
"salePrice": {
"type": "integer"
},
"offerPrice": {
"type": "integer"
},
"supplier": {
"type": "keyword"
},
"category1": {
"type": "keyword"
},
"category2": {
"type": "keyword"
},
"category3": {
"type": "keyword"
},
"category4": {
"type": "keyword"
},
"brand": {
"type": "keyword"
}
}
}
}
}