What is the proper settings and mapping for multiple languages


(Emrah) #1

Hello,

I am looking for best possible to way to create an index where fields will have content in multiple languages.

Some part of this actually is copied from the documentation and I have already tested it, which answers to my needs such as ordering by name in aggregations, searching with space.

However, I am not truly sure that whether this is heavy structure.

Any opinions?

Thanks

PUT /my_index
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0,
    "analysis": {
      "filter": {
        "arabic_stop": {
          "type": "stop",
          "stopwords": "_arabic_"
        },
        "arabic_keywords": {
          "type": "keyword_marker",
          "keywords": [
            "مثال"
          ]
        },
        "arabic_stemmer": {
          "type": "stemmer",
          "language": "arabic"
        },
        "english_stop": {
          "type": "stop",
          "stopwords": "_english_"
        },
        "english_keywords": {
          "type": "keyword_marker",
          "keywords": [
            "example"
          ]
        },
        "english_stemmer": {
          "type": "stemmer",
          "language": "english"
        },
        "english_possessive_stemmer": {
          "type": "stemmer",
          "language": "possessive_english"
        }
      },
      "analyzer": {
        "rebuilt_arabic": {
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "decimal_digit",
            "arabic_stop",
            "arabic_normalization",
            "arabic_keywords",
            "arabic_stemmer"
          ]
        },
        "rebuilt_english": {
          "tokenizer": "standard",
          "filter": [
            "english_possessive_stemmer",
            "lowercase",
            "english_stop",
            "english_keywords",
            "english_stemmer"
          ]
        }
      }
    }
  },
  "mappings": {
    "blog": {
      "properties": {
        "name": {
          "type": "text",
          "analyzer": "english",
          "fields": {
            "english": {
              "type": "text",
              "analyzer": "rebuilt_english"
            },
            "raw": {
              "type": "keyword",
              "normalizer": "custom_normalizer"
            }
          }
        },
        "name_localized": {
          "properties": {
            "ar": {
              "type": "text",
              "fields": {
                "english": {
                  "type": "text",
                  "analyzer": "rebuilt_arabic"
                }
              }
            },
            "en": {
              "type": "text",
              "fields": {
                "english": {
                  "type": "text",
                  "analyzer": "rebuilt_english"
                }
              }
            }
          }
        }
      }
    }
  }
}

(system) #2

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.