I have a problem with a specific search when using a term query and case_insensitive=true.
To demonstrate that I've created a shell script.
It deletes then creates index "a" with an attribute "my_kw" of type "keyword".
One document is stored with a value of "Gamma--MÖma" for "my_kw".
Then a term search is executed with value "Gamma--MÖma", it finds the document (as expected).
Then the search is executed with "gamma--mÖma" and case_insensitive=true, it finds the document (as expected).
But then a search is executed with "gamma--möma" and case_insensitive=true. Here it finds nothing, which comes unexpected.
Any idea why it is so?
The script:
# Setup:
#CONNECTION="localhost:9200"
CONNECTION="--cacert http_ca.crt -u elastic:xbsv-AdsDWOcefIGzyzJ https://localhost:19200"
INDEX="a"
set -vx
# Delete old index
deleteIndex() {
curl -XDELETE $CONNECTION/$INDEX
}
# Set up index with mappings
createIndex() {
curl -XPUT $CONNECTION/$INDEX -H 'Content-Type: application/json' -d '{
"settings": {
"analysis": {
"analyzer" : {
"automatic_analyzer" : {
"filter" : [
"lowercase",
"asciifolding"
],
"tokenizer" : "standard"
}
}
},
"number_of_replicas": "0",
"number_of_shards": "1"
},
"mappings": {
"properties": {
"my_kw": {
"type": "keyword",
"store": true
}
}
}
}'
}
# $1 - doc id
# $2 - keyword value
function insert() {
( echo -n '{"my_kw": "'$2'"}' ) | curl -XPOST $CONNECTION/$INDEX/_doc/$1 -H 'Content-Type: application/json' -d "@-"
}
# $1 - search term
# $2 - case_insensitive false/true
function search() {
echo
echo "===== Search ======"
curl -XPOST $CONNECTION/$INDEX/_search -H 'Content-Type: application/json' -d '
{
"query": {
"term" : {
"my_kw": {
"value": "'$1'",
"case_insensitive": '$2',
"boost": 1
}
}
}
}
' | tr , \\012
}
deleteIndex
createIndex
insert 1 "Gamma--MÖma"
sleep 2
# Searching with correct case and case_insensitive false => expect and get one hit
search "Gamma--MÖma" false
# Searching with wrong case, but umlaut in correct case and case_insensitive true => expect and get one hit
search "gamma--mÖma" true
# Searching with wrong case (umlaut also) and case_insensitive true => expect one hit, but get no hit
search "gamma--möma" true