Hi. I'm facing a problem with search highlight. When using cross_field type of search, the content returned in highlight does not consider synonyms.
Example:
"settings": {
"index": {
"analysis": {
"analyzer": {
"stemmer_search": {
"filter": [
"lowercase",
"asciifolding",
"brazilian_stemmer"
],
"type": "custom",
"tokenizer": "standard"
},
"synonyms_search": {
"filter": [
"lowercase",
"asciifolding",
"search_synoym",
"brazilian_stop",
"brazilian_stemmer"
],
"type": "custom",
"tokenizer": "standard"
}
},
"filter": {
"brazilian_stemmer": {
"type": "stemmer",
"language": "brazilian"
},
"search_synoym": {
"type": "synonym_graph",
"synonyms": [
"car, red car, big car",
"phone, iphone, tele phone"
]
},
"brazilian_stop": {
"type": "stop",
"stopwords": [
"a","agora","(...)"
]
}
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"search_analyzer": "synonyms_search",
"analyzer": "stemmer_search"
},
"body": {
"type": "text",
"search_analyzer": "synonyms_search",
"analyzer": "stemmer_search"
}
}
Ps: I've commented part of stopword list
PUT http://localhost:9200/teste/_doc/1
{
"title":"The search for de car",
"body": "The boy was looking for something red"
}
PUT http://localhost:9200/teste/_doc/2
{
"title":"calling from the tele phone",
"body": "the guy in the big car use your motorola for calls"
}
Search:
{
"highlight": {
"encoder": "html",
"fields": {
"title": {
"order": "score",
"type": "unified"
},
"body": {
"order": "score",
"type": "unified"
}
},
"post_tags": [
"]]]"
],
"pre_tags": [
"[[["
]
},
"query": {
"multi_match": {
"fields": [
"title",
"body"
],
"minimum_should_match": "100%",
"operator": "and",
"query": "red car",
"type": "cross_fields"
}
}
}
The result:
{
"took": 75,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.9061548,
"hits": [
{
"_index": "teste",
"_type": "_doc",
"_id": "2",
"_score": 1.9061548,
"_source": {
"title": "calling from the tele phone",
"body": "the guy in the big car use your motorola for calls"
},
"highlight": {
"body": [
"the guy in the [[[big]]] [[[car]]] use your motorola for calls"
]
}
},
{
"_index": "teste",
"_type": "_doc",
"_id": "1",
"_score": 0.6931472,
"_source": {
"title": "The search for de car",
"body": "The boy was looking for something red"
}
}
]
}
}
The first result highlighted a synonym, but the second don't. The word 'car' should be highlighted.
Any clue about this situation?
Thanks.