My task is:
- Make
procter&gamble
andprocter & gamble
produce the same results including score - Make it universal, not via synonyms, as it can be any other
Somehow&Somewhat
- Highlight
procter&gamble
orprocter & gamble
, not separate tokens if the phrase matches - I want to use
simple_query_string
as I allow search operators - Make
AT&T
searchable as well
Here is my snippet. The problems that procter&gamble
or procter & gamble
searches produce different scores and this different documents as the result.
But the user expects the same result for procter&gamble
or procter & gamble
DELETE /english_example
PUT /english_example
{
"settings": {
"analysis": {
"filter": {
"english_stop": {
"type": "stop",
"stopwords": "_english_"
},
"english_keywords": {
"type": "keyword_marker",
"keywords": ["example"]
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
},
"english_possessive_stemmer": {
"type": "stemmer",
"language": "possessive_english"
},
"acronymns": {
"type": "word_delimiter_graph",
"catenate_all" : true,
"preserve_original":true
},
"acronymns_": {
"type": "word_delimiter_graph",
"catenate_all" : true,
"preserve_original":true
},
"custom_stop_words_filter": {
"type": "stop",
"ignore_case": true,
"stopwords": [ "t" ]
}
},
"analyzer": {
"default": {
"tokenizer": "whitespace",
"char_filter": [
"ampersand_filter"
],
"filter": [
"english_possessive_stemmer",
"lowercase",
"acronymns",
"flatten_graph",
"english_stop",
"custom_stop_words_filter",
"english_keywords",
"english_stemmer"
]
}
},
"char_filter": {
"ampersand_filter": {
"type": "pattern_replace",
"pattern": "(?=[^&]*)( {0,}& {0,})(?=[^&]*)",
"replacement": "_and_"
},
"ampersand_filter2": {
"type": "mapping",
"mappings": [
"& => _and_"
]
}
}
}
}
}
PUT /english_example/_bulk
{ "index" : { "_id" : "1" } }
{ "description" : "wi-fi AT&T BB&T Procter & Gamble, some\nOther $500 games with Peter's", "contents" : "Much text with somewhere I meet Procter or Gamble" }
{ "index" : { "_id" : "2" } }
{ "description" : "Procter & Gamble", "contents" : "Much text with somewhere I meet Procter and Gamble" }
{ "index" : { "_id" : "3" } }
{ "description" : "Procter&Gamble", "contents" : "Much text with somewhere I meet Procter & Gamble" }
{ "index" : { "_id" : "4" } }
{ "description" : "Come Procter&Gamble", "contents" : "Much text with somewhere I meet Procter&Gamble" }
{ "index" : { "_id" : "5" } }
{ "description" : "Tome Procter & Gamble", "contents" : "Much text with somewhere I don't meet AT&T" }
# "query": "procter & gamble",
GET english_example/_search
{
"query": {
"simple_query_string": {
"query": "procter & gamble",
"default_operator": "or",
"fields": [
"description^2",
"contents^80"
]
}
},
"highlight": {
"fields": {
"description": {},
"contents": {}
}
}
}
# "query": "procter&gamble",
GET english_example/_search
{
"query": {
"simple_query_string": {
"query": "procter&gamble",
"default_operator": "or",
"fields": [
"description^2",
"contents^80"
]
}
},
"highlight": {
"fields": {
"description": {},
"contents": {}
}
}
}
# "query": "at&t",
GET english_example/_search
{
"query": {
"simple_query_string": {
"query": "at&t",
"default_operator": "or",
"fields": [
"description^2",
"contents^80"
]
}
},
"highlight": {
"fields": {
"description": {},
"contents": {}
}
}
}