I've created an Elasticsearch index with custom settings and analyzers to handle various text transformations. Here are the settings for my index:
"settings": {
"index": {
"analysis": {
"filter": {
"my_synonyms_2_first_name": {
"type": "synonym",
"synonyms_path": "Broadness_3_First_Name.txt"
},
"my_synonyms_first_name": {
"type": "synonym",
"synonyms_path": "Broadness_2_First_Name.txt"
},
"french_stop": {
"type": "stop",
"stopwords": "_french_"
},
"english_stemmer": {
"name": "english",
"type": "stemmer"
},
"my_english_soundex": {
"replace": "false",
"type": "phonetic",
"encoder": "soundex"
},
"my_french_soundex": {
"languageset": ["french"],
"replace": "false",
"rule_type": "approx",
"type": "phonetic",
"encoder": "beider_morse",
"name_type": "generic"
},
"my_synonyms_last_name": {
"type": "synonym",
"synonyms_path": "Broadness_2_Last_Name.txt"
},
"english_stop": {
"type": "stop",
"stopwords": "_english_"
},
"french_elision": {
"type": "elision",
"articles": [
"l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"
],
"articles_case": "true"
},
"my_location_synonyms": {
"type": "synonym",
"synonyms_path": "LOCATION.txt"
},
"my_synonyms_2_last_name": {
"type": "synonym",
"synonyms_path": "Broadness_3_Last_Name.txt"
},
"french_stemmer": {
"name": "french",
"type": "stemmer"
}
},
"analyzer": {
"uppercase": {
"filter": ["uppercase", "asciifolding"],
"tokenizer": "standard"
},
"location_synonyms": {
"filter": ["lowercase", "asciifolding", "my_location_synonyms"],
"tokenizer": "keyword"
},
"my_synonyms": {
"filter": ["lowercase", "asciifolding", "my_synonyms_first_name", "my_synonyms_last_name"],
"tokenizer": "standard"
},
"lowercase": {
"filter": ["lowercase", "asciifolding"],
"tokenizer": "standard"
},
"my_english_phonetic": {
"filter": ["lowercase", "my_english_soundex", "asciifolding"],
"tokenizer": "standard"
},
"my_synonyms_2": {
"filter": ["lowercase", "asciifolding", "my_synonyms_2_first_name", "my_synonyms_2_last_name"],
"tokenizer": "standard"
},
"english": {
"filter": ["lowercase", "asciifolding"],
"tokenizer": "standard"
},
"fuzzy_analyzer": {
"filter": ["lowercase"],
"tokenizer": "standard"
},
"my_french_phonetic": {
"filter": ["lowercase", "french_stop", "french_stemmer", "my_french_soundex"],
"tokenizer": "standard"
},
"french": {
"filter": ["lowercase", "french_stop", "french_stemmer", "french_elision"],
"tokenizer": "standard"
}
}
},
"similarity": {
"scripted_tfidf": {
"type": "scripted",
"script": {
"source": "double tf = Math.sqrt(doc.freq); double idf = 1.0; double norm = 1/Math.sqrt(doc.length); return query.boost * tf * idf * norm;"
}
}
}
}
}
I have also created multiple field types for each text field with these custom analyzers. Here is an example for the prenom_conjointe_precedente_du_probant field:
"prenom_conjointe_precedente_du_probant": {
"similarity": "scripted_tfidf",
"type": "text",
"fields": {
"uppercase": {
"analyzer": "uppercase",
"similarity": "scripted_tfidf",
"type": "text"
},
"english_phonetic": {
"analyzer": "my_english_phonetic",
"similarity": "scripted_tfidf",
"type": "text"
},
"exact_match": {
"similarity": "scripted_tfidf",
"type": "keyword"
},
"french_phonetic": {
"analyzer": "my_french_phonetic",
"similarity": "scripted_tfidf",
"type": "text"
},
"lowercase": {
"analyzer": "lowercase",
"similarity": "scripted_tfidf",
"type": "text"
},
"english": {
"analyzer": "english",
"similarity": "scripted_tfidf",
"type": "text"
},
"broadness_3_synonyms": {
"analyzer": "my_synonyms_2",
"similarity": "scripted_tfidf",
"type": "text"
},
"suggest": {
"max_input_length": 50,
"analyzer": "simple",
"preserve_position_increments": true,
"type": "completion",
"preserve_separators": true
},
"broadness_2_synonyms": {
"analyzer": "my_synonyms",
"similarity": "scripted_tfidf",
"type": "text"
},
"french": {
"analyzer": "french",
"similarity": "scripted_tfidf",
"type": "text"
},
"fuzzy": {
"analyzer": "fuzzy_analyzer",
"type": "text"
}
},
"copy_to": [
"full_name_conjointe_precedente_du_probant"
]
}
After indexing the data, I noticed that the fields using the lowercase analyzer did not transform the data to lowercase or remove ASCII characters as expected.

Could someone help me understand why the lowercase analyzer is not applying the expected transformations on my indexed data? Is there something I'm missing in the configuration or the way data is being indexed? Any insights or solutions would be greatly appreciated!