Problem with Template File Not Applying Correctly in Logstash

inkweon7269 · October 30, 2023, 7:09am

We are experiencing an issue with Logstash where the user_dictionary_rules, stopwords, and synonyms data are not being properly indexed based on the template file in an EC2 environment.

When these data sets, specifically user_dictionary_rules, stopwords, and synonyms, are small in size, they are being applied correctly. However, when the quantities increase as listed below, we are encountering indexing issues on our EC2 environment:

user_dictionary_rules: 2,750
topwords: 380
synonyms: 71

Our AWS EC2 instance specifications are as follows:

Instance Type: t2.xlarge
4 vCPUs
16 GiB Memory

What can be done to resolve this problem?

{
  "index_patterns": [ "product", "product-*"],
  "template": {
    "settings": {
      "index": {
        "analysis": {
          "tokenizer": {
            "nori_analyzer": {
              "type": "nori_tokenizer",
              "decompound_mode": "mixed",
              "discard_punctuation": false,
              "user_dictionary_rules": [....]
            }
          },
          "filter": {
            "nori_filter": {
              "type": "nori_part_of_speech",
              "stoptags": [
                "E", "IC", "J", "MAG", "MAJ", "MM", "SP", "SSC", "SSO", "SC", "SE", "XPN", "XSA", "XSN", "XSV", "UNA", "NA", "VSV"
              ]
            },
            "stop_filter": {
              "type": "stop",
              "stopwords": [...]
            },
            "synonym_filter": {
              "type": "synonym",
              "lenient": true,
              "synonyms": [...]
            }
          },
          "analyzer": {
            "korean": {
              "type": "custom",
              "tokenizer": "nori_analyzer",
              "filter": ["lowercase", "stop", "nori_filter", "stop_filter", "synonym_filter"],
              "char_filter": ["html_strip"]
            }
          }
        }
      }
    },
    "mappings": {
      "_source": {
        "enabled": true
      },
      "properties": {
        "fullName": {
          "type": "text",
          "analyzer": "korean"
        },
        "product": {
          "type": "text",
          "analyzer": "korean"
        },
        "option": {
          "type": "text",
          "analyzer": "korean"
        },
        "salePrice": {
          "type": "integer"
        },
        "offerPrice": {
          "type": "integer"
        },
        "supplier": {
          "type": "keyword"
        },
        "category1": {
          "type": "keyword"
        },
        "category2": {
          "type": "keyword"
        },
        "category3": {
          "type": "keyword"
        },
        "category4": {
          "type": "keyword"
        },
        "brand": {
          "type": "keyword"
        }
      }
    }
  }
}

system · November 27, 2023, 7:09am

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.