We have an index in Elasticsearch 7.17, It uses synonym files you can see the full structure below:
{
"my-index" : {
"settings" : {
"index" : {
"routing" : {
"allocation" : {
"include" : {
"_tier_preference" : "data_content"
}
}
},
"mapping" : {
"ignore_malformed" : "true"
},
"refresh_interval" : "30s",
"number_of_shards" : "3",
"provided_name" : "ecommerce-aug-14",
"max_inner_result_window" : "200",
"default_pipeline" : "ecommerce-pipeline",
"creation_date" : "1692020601428",
"analysis" : {
"filter" : {
"condition_shingle_filter" : {
"filter" : [
"shingle_filter"
],
"type" : "condition",
"script" : {
"source" : """ !token.type.contains( "SYNONYM") """
}
},
"graph_synonyms" : {
"updateable" : "false",
"type" : "synonym_graph",
"synonyms_path" : "analysis/synonym.txt",
"lenient" : "true"
},
"limit_2_token" : {
"type" : "limit",
"max_token_count" : "2"
},
"alphabet_num_sep" : {
"catenate_all" : "true",
"split_on_case_change" : "false",
"type" : "word_delimiter_graph",
"preserve_original" : "true"
},
"shingle_filter" : {
"token_separator" : "",
"type" : "shingle"
},
"limit_3_token" : {
"type" : "limit",
"max_token_count" : "3"
},
"graph_synonyms3gram" : {
"updateable" : "false",
"type" : "synonym_graph",
"synonyms_path" : "analysis/synonym3gram.txt",
"lenient" : "true"
},
"graph_synonyms2gram" : {
"updateable" : "false",
"type" : "synonym_graph",
"synonyms_path" : "analysis/synonym2gram.txt",
"lenient" : "true"
},
"limit_1_token" : {
"type" : "limit",
"max_token_count" : "1"
},
"alphabet_num_sep_with_underscore" : {
"catenate_all" : "true",
"split_on_case_change" : "false",
"type" : "word_delimiter_graph",
"type_table" : [
"_ => ALPHA"
],
"preserve_original" : "true"
},
"graph_synonyms1gram" : {
"updateable" : "false",
"type" : "synonym_graph",
"synonyms_path" : "analysis/synonym1gram.txt",
"lenient" : "true"
}
},
"analyzer" : {
"name_analyzer" : {
"filter" : [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"graph_synonyms",
"alphabet_num_sep",
"flatten_graph",
"unique",
"condition_shingle_filter"
],
"char_filter" : [
"arabic_letter_normalizer",
"handle_punctuations"
],
"tokenizer" : "standard"
},
"name_search_analyzer" : {
"filter" : [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"graph_synonyms",
"alphabet_num_sep",
"unique",
"condition_shingle_filter"
],
"char_filter" : [
"arabic_letter_normalizer",
"handle_punctuations"
],
"tokenizer" : "standard"
},
"first_2gram" : {
"filter" : [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_2_token",
"graph_synonyms2gram",
"flatten_graph",
"unique"
],
"tokenizer" : "standard"
},
"first_3gram" : {
"filter" : [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_3_token",
"graph_synonyms3gram",
"flatten_graph",
"unique"
],
"tokenizer" : "standard"
},
"first_1gram" : {
"filter" : [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_1_token",
"graph_synonyms1gram",
"flatten_graph",
"unique"
],
"tokenizer" : "standard"
},
"first_3gram_search" : {
"filter" : [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_3_token",
"graph_synonyms3gram"
],
"tokenizer" : "standard"
},
"refined_name_analyzer" : {
"filter" : [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"graph_synonyms",
"alphabet_num_sep_with_underscore",
"flatten_graph",
"unique",
"condition_shingle_filter"
],
"char_filter" : [
"arabic_letter_normalizer",
"handle_punctuations_no_underscore"
],
"tokenizer" : "standard"
},
"first_2gram_search" : {
"filter" : [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_2_token",
"graph_synonyms2gram"
],
"tokenizer" : "standard"
},
"refined_name_search_analyzer" : {
"filter" : [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"graph_synonyms",
"alphabet_num_sep_with_underscore",
"unique",
"condition_shingle_filter"
],
"char_filter" : [
"arabic_letter_normalizer",
"handle_punctuations_no_underscore"
],
"tokenizer" : "standard"
},
"first_1gram_search" : {
"filter" : [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_1_token",
"graph_synonyms1gram"
],
"tokenizer" : "standard"
}
},
"char_filter" : {
"handle_punctuations_no_underscore" : {
"type" : "mapping",
"mappings" : [
"'=>",
"=> "
]
},
"arabic_letter_normalizer" : {
"type" : "mapping",
"mappings_path" : "analysis/arabic_char_replace.txt"
},
"handle_punctuations" : {
"type" : "mapping",
"mappings_path" : "analysis/punctuations_char_replace.txt"
}
}
},
"number_of_replicas" : "1",
"uuid" : "4C3fbpKGSzqpXRG-I-c_SA",
"version" : {
"created" : "7170099"
}
}
}
}
}
We want to use the synonym API (here) so I created the synonym sets and rewrite the create index query like this:
{
"settings": {
"index": {
"routing": {
"allocation": {
"include": {
"_tier_preference": "data_content"
}
}
},
"mapping": {
"ignore_malformed": "true"
},
"refresh_interval": "30s",
"number_of_shards": "3",
"max_inner_result_window": "200",
"default_pipeline": "ecommerce-pipeline",
"analysis": {
"filter": {
"condition_shingle_filter": {
"filter": [
"shingle_filter"
],
"type": "condition",
"script": {
"source": """ !token.type.contains( "SYNONYM") """
}
},
"graph_synonyms": {
"updateable": "false",
"type": "synonym_graph",
"synonyms_set": "synonyms",
"lenient": "true"
},
"limit_2_token": {
"type": "limit",
"max_token_count": "2"
},
"alphabet_num_sep": {
"catenate_all": "true",
"split_on_case_change": "false",
"type": "word_delimiter_graph",
"preserve_original": "true"
},
"shingle_filter": {
"token_separator": "",
"type": "shingle"
},
"limit_3_token": {
"type": "limit",
"max_token_count": "3"
},
"graph_synonyms3gram": {
"updateable": "false",
"type": "synonym_graph",
"synonyms_set": "synonym3gram",
"lenient": "true"
},
"graph_synonyms2gram": {
"updateable": "false",
"type": "synonym_graph",
"synonyms_set": "synonym2gram",
"lenient": "true"
},
"limit_1_token": {
"type": "limit",
"max_token_count": "1"
},
"alphabet_num_sep_with_underscore": {
"catenate_all": "true",
"split_on_case_change": "false",
"type": "word_delimiter_graph",
"type_table": [
"_ => ALPHA"
],
"preserve_original": "true"
},
"graph_synonyms1gram": {
"updateable": "false",
"type": "synonym_graph",
"synonyms_set": "synonym1gram",
"lenient": "true"
}
},
"analyzer": {
"name_analyzer": {
"filter": [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"graph_synonyms",
"alphabet_num_sep",
"flatten_graph",
"unique",
"condition_shingle_filter"
],
"char_filter": [
"arabic_letter_normalizer",
"handle_punctuations"
],
"tokenizer": "standard"
},
"name_search_analyzer": {
"filter": [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"graph_synonyms",
"alphabet_num_sep",
"unique",
"condition_shingle_filter"
],
"char_filter": [
"arabic_letter_normalizer",
"handle_punctuations"
],
"tokenizer": "standard"
},
"first_2gram": {
"filter": [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_2_token",
"graph_synonyms2gram",
"flatten_graph",
"unique"
],
"tokenizer": "standard"
},
"first_3gram": {
"filter": [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_3_token",
"graph_synonyms3gram",
"flatten_graph",
"unique"
],
"tokenizer": "standard"
},
"first_1gram": {
"filter": [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_1_token",
"graph_synonyms1gram",
"flatten_graph",
"unique"
],
"tokenizer": "standard"
},
"first_3gram_search": {
"filter": [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_3_token",
"graph_synonyms3gram"
],
"tokenizer": "standard"
},
"refined_name_analyzer": {
"filter": [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"graph_synonyms",
"alphabet_num_sep_with_underscore",
"flatten_graph",
"unique",
"condition_shingle_filter"
],
"char_filter": [
"arabic_letter_normalizer",
"handle_punctuations_no_underscore"
],
"tokenizer": "standard"
},
"first_2gram_search": {
"filter": [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_2_token",
"graph_synonyms2gram"
],
"tokenizer": "standard"
},
"refined_name_search_analyzer": {
"filter": [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"graph_synonyms",
"alphabet_num_sep_with_underscore",
"unique",
"condition_shingle_filter"
],
"char_filter": [
"arabic_letter_normalizer",
"handle_punctuations_no_underscore"
],
"tokenizer": "standard"
},
"first_1gram_search": {
"filter": [
"lowercase",
"decimal_digit",
"arabic_normalization",
"persian_normalization",
"limit_1_token",
"graph_synonyms1gram"
],
"tokenizer": "standard"
}
},
"char_filter": {
"handle_punctuations_no_underscore": {
"type": "mapping",
"mappings": [
"'=>",
"=> "
]
},
"arabic_letter_normalizer": {
"type": "mapping",
"mappings": ["ْ=>","ٌ=>","ً=>","ٍ=>","ُ=>","َ=>","ّ=>","ِ=>","ؤ=>و","ة=>ه","ك=>ک","ٓ=>","ٔ=>","ء=>","ي=>ی","ے=>ی","ى=>ی","ئ=>ی","ہ=>ه","ھ=>ه","ٰ=>","أ=>ا","إ=>ا","ٱ=>ا","آ=>ا","ۂ=>ه","ۀ=>ه"]
},
"handle_punctuations": {
"type": "mapping",
"mappings": ["_=>\u0020","ـ=>\u0020","'=>","\u200C=>\u0020"]
}
}
},
"number_of_replicas": "1"
}
}
}
But when I issue the query I get the following error:
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "Can't apply [synonyms_set]! Loading synonyms from index is supported only for search time synonyms!"
}
],
"type": "illegal_argument_exception",
"reason": "Can't apply [synonyms_set]! Loading synonyms from index is supported only for search time synonyms!"
},
"status": 400
}
What is the problem here?
If I change "updateable": "false"
for synonym graphs to true
the error turns to
{
"error": {
"root_cause": [
{
"type": "mapper_exception",
"reason": "analyzer [first_3gram] contains filters [graph_synonyms3gram] that are not allowed to run in index time mode."
}
],
"type": "mapper_parsing_exception",
"reason": "Failed to parse mapping: analyzer [first_3gram] contains filters [graph_synonyms3gram] that are not allowed to run in index time mode.",
"caused_by": {
"type": "mapper_exception",
"reason": "analyzer [first_3gram] contains filters [graph_synonyms3gram] that are not allowed to run in index time mode."
}
},
"status": 400
}
Which I understand why this happens.