Hello,
I am looking for best possible to way to create an index where fields will have content in multiple languages.
Some part of this actually is copied from the documentation and I have already tested it, which answers to my needs such as ordering by name in aggregations, searching with space.
However, I am not truly sure that whether this is heavy structure.
Any opinions?
Thanks
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"arabic_stop": {
"type": "stop",
"stopwords": "_arabic_"
},
"arabic_keywords": {
"type": "keyword_marker",
"keywords": [
"مثال"
]
},
"arabic_stemmer": {
"type": "stemmer",
"language": "arabic"
},
"english_stop": {
"type": "stop",
"stopwords": "_english_"
},
"english_keywords": {
"type": "keyword_marker",
"keywords": [
"example"
]
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
},
"english_possessive_stemmer": {
"type": "stemmer",
"language": "possessive_english"
}
},
"analyzer": {
"rebuilt_arabic": {
"tokenizer": "standard",
"filter": [
"lowercase",
"decimal_digit",
"arabic_stop",
"arabic_normalization",
"arabic_keywords",
"arabic_stemmer"
]
},
"rebuilt_english": {
"tokenizer": "standard",
"filter": [
"english_possessive_stemmer",
"lowercase",
"english_stop",
"english_keywords",
"english_stemmer"
]
}
}
}
},
"mappings": {
"blog": {
"properties": {
"name": {
"type": "text",
"analyzer": "english",
"fields": {
"english": {
"type": "text",
"analyzer": "rebuilt_english"
},
"raw": {
"type": "keyword",
"normalizer": "custom_normalizer"
}
}
},
"name_localized": {
"properties": {
"ar": {
"type": "text",
"fields": {
"english": {
"type": "text",
"analyzer": "rebuilt_arabic"
}
}
},
"en": {
"type": "text",
"fields": {
"english": {
"type": "text",
"analyzer": "rebuilt_english"
}
}
}
}
}
}
}
}
}