Hi all,
I'm having a hard time configuring correctly ES to find accented and
non-accented words the same way.
Here is a sequence anyone can execute :
delete index
curl -XDELETE 'http://localhost:9200/my_index'
create index
curl -XPOST 'http://localhost:9200/my_index'
check create ok
curl -XGET 'http://localhost:9200/my_index/_settings'
close index
curl -XPOST 'http://localhost:9200/my_index/_close'
update index settings
curl -XPUT 'http://localhost:9200/my_index/_settings' -d '{
"index.analysis.analyzer.default.type":"snowball",
"index.analysis.analyzer.default.tokenizer":"standard",
"index.analysis.analyzer.default.filter.0":"standard",
"index.analysis.analyzer.default.filter.1":"lowercase",
"index.analysis.analyzer.default.filter.2":"asciifolding",
"index.analysis.analyzer.default.filter.3":"french_stemmer",
"index.analysis.filter.french_stemmer.type":"stemmer",
"index.analysis.filter.french_stemmer.name":"light_french"
}'
open index
curl -XPOST 'http://localhost:9200/my_index/_open'
check update index settings
curl -XGET 'http://localhost:9200/my_index/_settings'
#* create type*
curl -XPUT 'http://localhost:9200/my_index/my_type/_mapping' -d
'{"my_type":{"properties":{"title":{"type":"string"},"reference":{"type":"string",
"index":"not_analyzed"}}}}'
check create type
curl -XGET 'http://localhost:9200/my_index/my_type/_mapping'
add data
curl -XPUT 'http://localhost:9200/my_index/my_type/1' -d
'{"reference":"ADV-REF-00000001", "title":"Ingénieur Java"}'
curl -XPUT 'http://localhost:9200/my_index/my_type/2' -d
'{"reference":"ADV-REF-00000002", "title":"Conservateur documentaliste"}'
curl -XPUT 'http://localhost:9200/my_index/my_type/3' -d
'{"reference":"ADV-REF-00000003", "title":"Technicien qualité validation
H/F"}'
curl -XPUT 'http://localhost:9200/my_index/my_type/4' -d
'{"reference":"ADV-REF-00000004", "title":"Valet de chambre"}'
curl -XPUT 'http://localhost:9200/my_index/my_type/5' -d
'{"reference":"ADV-REF-00000005", "title":"Ingénieur PHP"}'
check add data
curl -XGET 'http://localhost:9200/my_index/my_type/1'
search data
curl -XGET 'http://localhost:9200/my_index/my_type/_search' -d
'{"query":{"query_string":{"analyze_wildcard":"true",
"query":"title:ingenieur"}}}'
curl -XGET 'http://localhost:9200/my_index/my_type/_search' -d
'{"query":{"query_string":{"analyze_wildcard":"true",
"query":"title:ingénieur"}}}'
curl -XGET 'http://localhost:9200/my_index/my_type/_search' -d
'{"query":{"query_string":{"analyze_wildcard":"true",
"query":"title:inge"}}}'
curl -XGET 'http://localhost:9200/my_index/my_type/_search' -d
'{"query":{"query_string":{"analyze_wildcard":"true",
"query":"title:ingé"}}}'
curl -XGET 'http://localhost:9200/my_index/my_type/_search' -d
'{"query":{"query_string":{"analyze_wildcard":"true",
"query":"title:Ingenieur"}}}'
curl -XGET 'http://localhost:9200/my_index/my_type/_search' -d
'{"query":{"query_string":{"analyze_wildcard":"true",
"query":"title:Ingénieur"}}}'
curl -XGET 'http://localhost:9200/my_index/my_type/_search' -d
'{"query":{"query_string":{"analyze_wildcard":"true",
"query":"title:Inge"}}}'
curl -XGET 'http://localhost:9200/my_index/my_type/_search' -d
'{"query":{"query_string":{"analyze_wildcard":"true",
"query":"title:Ingé"}}}'
I configured snowball, asciifolding, french_stemmer on my index.
The queries should return 2 results corresponding to _id=1 and _id=5
The first and the 5th return 0 result.
Any hint on what I am doing wrong ?
Thanks for your help,
--
Cordialement/Regards,
Louis GUEYE
linkedin http://fr.linkedin.com/in/louisgueye |
bloghttp://deepintojee.wordpress.com/|
twitter http://twitter.com/#!/lgueye