Hi,
I am working on a project to perform multilingual full-text search using Elasticsearch. one field can contain a word combination of different languages or transliteration. for example in the English text may contain Armenian words. or Russian words in the Armenian text. and i am trying now to configure text analysis with language analyzer.
in my case don't know what language it is before indexing.
{
"settings": {
"analysis": {
"filter": {
"armenian_stop": {
"type": "stop",
"stopwords": "_armenian_"
},
"armenian_keywords": {
"type": "keyword_marker",
"keywords": [
"օրինակ"
]
},
"armenian_stemmer": {
"type": "stemmer",
"language": "armenian"
},
"russian_stop": {
"type": "stop",
"stopwords": "_russian_"
},
"russian_keywords": {
"type": "keyword_marker",
"keywords": [
"пример"
]
},
"russian_stemmer": {
"type": "stemmer",
"language": "russian"
},
"graph_synonyms": {
"type": "synonym",
"synonyms_path": "analysis/synonym.txt"
}
},
"analyzer": {
"rebuilt_armenian": {
"tokenizer": "standard",
"filter": [
"lowercase",
"armenian_stop",
"armenian_keywords",
"armenian_stemmer",
"graph_synonyms"
]
},
"rebuilt_russian": {
"tokenizer": "standard",
"filter": [
"lowercase",
"russian_stop",
"russian_keywords",
"russian_stemmer"
]
}
}
}
},
"mappings": {
"properties": {
"age": {
"type": "integer"
},
"email": {
"type": "keyword"
},
"name": {
"type": "text",
"fields": {
"ar": {
"type": "text",
"analyzer": "rebuilt_armenian"
},
"ru": {
"type": "text",
"analyzer": "rebuilt_russian"
}
}
},
"location": {
"type": "geo_point"
}
}
}
}
query:
and as far as I understand it is necessary to search for specific fields, if you search for example by "name" then the standard analyzer will work
Am I doing everything right?
{
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [ "name.ar", "name.ru"],
"query": "ներմուծված"
}
}
],
"filter": [
{
"geo_distance": {
"distance": "25km",
"location": {
"lat": 40.79420000 ,
"lon": 43.84528000
}
}
}
]
}
}
}