Hey guys,
Newbie Elastic user here.
I'd like to share with you an issue I have while using the highlight feature in a complex search operation.
What I want to achieve is to get the list of fields that match my search and led to the document being returned.
Here is my search query that is composed of 3 multi match query on the same fields but with different types and analyszer :
GET /datasets/_search
{
"explain": true,
"highlight": {
"fields": {
"name": {},
"description": {},
"scope": {},
"businessTerms": {}
},
"require_field_match": false
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"multi_match": {
"type": "cross_fields",
"query": "Customer something Receipt",
"fields": [
"name^1",
"scope^1",
"description^0.05",
"businessTerms^2"
],
"analyzer": "whitespace_tokenizer",
"_name": "_datasets"
}
},
{
"multi_match": {
"query": "Customer something Receipt",
"type": "phrase",
"slop": 1,
"fields": [
"name^1",
"scope^1",
"description^0.05",
"businessTerms^2"
],
"analyzer": "whitespace_tokenizer",
"_name": "_phrase_datasets"
}
},
{
"multi_match": {
"query": "Customer something Receipt",
"type": "phrase",
"slop": 3,
"fields": [
"name^1",
"scope^1",
"description^0.05",
"businessTerms^2"
],
"analyzer": "lowercase_tokenizer_bu_mapping",
"_name": "_exact_phrase_bu_mapping_datasets"
}
}
]
}
}
],
"must_not": [
{
"term": {
"isPrivate": true
}
}
]
}
}
}
Below is a sample of the response :
{
"_shard": "[datasets][0]",
"_node": "fv-Hfz4kTmuhDxfRsRVM1g",
"_index": "datasets",
"_type": "_doc",
"_id": "SomeId",
"_score": 15.152907,
"_source": {
"name": "Some name",
"code": "SomeId",
"teamId": "teamId",
"description": "Some Description",
"businessTerms": [
"Customer receipt line",
"Customer Receipt",
"Customer Discount",
"Payment line",
"Profit Transaction"
],
"isPrivate": false
},
"matched_queries": [
"_datasets"
]
}
As you can see from the output, there's no highlight but we can see that the match_query was the _dataset one.
What bugs me is that if I remove other queries from my search :
GET /datasets/_search
{
"explain": true,
"highlight": {
"fields": {
"name": {},
"description": {},
"scope": {},
"businessTerms": {}
},
"require_field_match": false
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"multi_match": {
"type": "cross_fields",
"query": "Customer something Receipt",
"fields": [
"name^1",
"scope^1",
"description^0.05",
"businessTerms^2"
],
"analyzer": "whitespace_tokenizer",
"_name": "_datasets"
}
}
]
}
}
],
"must_not": [
{
"term": {
"isPrivate": true
}
}
]
}
}
}
The output has an highlight that corresponds to what I expect :
{
"_shard": "[datasets][0]",
"_node": "fv-Hfz4kTmuhDxfRsRVM1g",
"_index": "datasets",
"_type": "_doc",
"_id": "SomeId",
"_score": 15.152907,
"_source": {
"name": "Some name",
"code": "SomeId",
"teamId": "teamId",
"description": "Some description",
"businessTerms": [
"Sales",
"Customer receipt line",
"Customer Receipt",
"Customer Discount",
"Payment line",
"Profit Transaction"
],
"isPrivate": false
},
"highlight": {
"businessTerms": [
"<em>Customer</em> <em>receipt</em> line",
"<em>Customer</em> <em>Receipt</em>",
"<em>Customer</em> Discount"
]
},
"matched_queries": [
"_datasets"
]
}
Do you guys have any idea why it behaves like that ?
And how I can achieve what I'm looking to do ?
Thanks,
Fabian.