Hey everybody, I'm running into an issue with highlights not being generated for certain phrase match queries using wildcards or fuzzy matching with spans.
Included below are steps for test replication from Kibana.
Elasticsearch version 7.9.3
1. Create Index
PUT /test-index-1
{
"mappings": {
"properties": {
"transcription_en": {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 10000
}
}
}
}
}
}
2. Create Document
PUT /test-index-1/_doc/1
{
"transcription_en": "this is an example of a transcription string for fuzzy and wildcard phrase matching"
}
3. Check Queries
VALID QUERIES - these generate highlights as expected
3a. Basic phrase match query:
GET test-index-1/_search
{
"highlight": {
"fields": {
"transcription_en": {}
},
"number_of_fragments": 0
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{"match_phrase": {"transcription_en": "phrase matching"}}
]
}
}
]
}
}
}
3a. Response:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.5753642,
"hits" : [
{
"_index" : "test-index-1",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.5753642,
"_source" : {
"transcription_en" : "this is an example of a transcription string for fuzzy and wildcard phrase matching"
},
"highlight" : {
"transcription_en" : [
"this is an example of a transcription string for fuzzy and wildcard <em>phrase</em> <em>matching</em>"
]
}
}
]
}
}
3b. Fuzzy phrase match query:
GET test-index-1/_search
{
"highlight": {
"fields": {
"transcription_en": {}
},
"number_of_fragments": 0
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"span_near": {
"clauses": [
{
"span_multi": {
"match": {
"fuzzy": {
"transcription_en": {
"fuzziness": "auto",
"value": "phrasw"
}
}
}
}
},
{
"span_multi": {
"match": {
"fuzzy": {
"transcription_en": {
"fuzziness": "auto",
"value": "matching"
}
}
}
}
}
],
"in_order": true,
"slop": 0
}
}
]
}
}
]
}
}
}
3b. Response:
{
"took" : 23,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.5753642,
"hits" : [
{
"_index" : "test-index-1",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.5753642,
"_source" : {
"transcription_en" : "this is an example of a transcription string for fuzzy and wildcard phrase matching"
},
"highlight" : {
"transcription_en" : [
"this is an example of a transcription string for fuzzy and wildcard <em>phrase</em> <em>matching</em>"
]
}
}
]
}
}
========================================
INVALID QUERIES - these do NOT generate highlights
3c. Wildcard phrase match query:
GET test-index-1/_search
{
"highlight": {
"fields": {
"transcription_en": {}
},
"number_of_fragments": 0
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"span_near": {
"clauses": [
{
"span_multi": {
"match": {
"wildcard": {"transcription_en": {"value": "phras?"}}
}
}
},
{
"span_multi": {
"match": {
"wildcard": {"transcription_en": {"value": "matching"}}
}
}
}
],
"in_order": true,
"slop": 0
}
}
]
}
}
]
}
}
}
3c. Response:
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.5753642,
"hits" : [
{
"_index" : "test-index-1",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.5753642,
"_source" : {
"transcription_en" : "this is an example of a transcription string for fuzzy and wildcard phrase matching"
}
}
]
}
}
3d. Alternate fuzzy phrase match query:
GET test-index-1/_search
{
"highlight": {
"fields": {
"transcription_en": {}
},
"number_of_fragments": 0
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"span_near": {
"clauses": [
{
"span_multi": {
"match": {
"fuzzy": {
"transcription_en": {
"fuzziness": "auto",
"value": "this"
}
}
}
}
},
{
"span_multi": {
"match": {
"fuzzy": {
"transcription_en": {
"fuzziness": "auto",
"value": "is"
}
}
}
}
},
{
"span_multi": {
"match": {
"fuzzy": {
"transcription_en": {
"fuzziness": "auto",
"value": "an"
}
}
}
}
},
{
"span_multi": {
"match": {
"fuzzy": {
"transcription_en": {
"fuzziness": "auto",
"value": "examplw"
}
}
}
}
}
],
"in_order": true,
"slop": 0
}
}
]
}
}
]
}
}
}
3d. Response:
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.1507283,
"hits" : [
{
"_index" : "test-index-1",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.1507283,
"_source" : {
"transcription_en" : "this is an example of a transcription string for fuzzy and wildcard phrase matching"
}
}
]
}
}