Highlight "best matches" using Elastic Highlighting API

Hi all. Is it possible to highlight "best matches" using Elastic Highlighting API?
By "best match", I mean exact matches in a word even the whole word is matched by a query. For example:

  • document content is Dubai
  • search query is duba
  • and the desired result is <b>Duba</b>i

But the problem is in the query I have. The query has some "fuzziness" queries.
Here is the example index configuration:

PUT /highlight_best_match
{
  "settings": {
    "number_of_shards": "1",
    "number_of_replicas": "1",
    "analysis": {
      "filter": {
        "language_stemmer": {
          "name": "german2",
          "type": "stemmer"
        },
        "language_stopwords": {
          "type": "stop",
          "stopwords": "_german_"
        }
      },
      "char_filter": {
        "ampersand_to_and": {
          "type": "mapping",
          "mappings": [
            "&=> and "
          ]
        }
      },
      "analyzer": {
        "prefix_analyzer": {
          "type": "custom",
          "tokenizer": "edge_ngram_tokenizer",
          "filter": [
            "german_normalization",
            "lowercase"
          ]
        },
        "match_analyzer": {
          "char_filter": [
            "html_strip",
            "ampersand_to_and"
          ],
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "asciifolding",
            "language_stopwords",
            "language_stemmer"
          ]
        },
        "search_analyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "german_normalization",
            "lowercase"
          ]
        }
      },
      "tokenizer": {
        "edge_ngram_tokenizer": {
          "type": "edge_ngram",
          "min_gram": "2",
          "max_gram": "20",
          "token_chars": [
            "letter",
            "digit"
          ]
        }
      }
    }
  },
  "mappings": {
    "default": {
      "dynamic": "false",
      "properties": {
        "id": {
          "type": "integer"
        },
        "title": {
          "type": "keyword",
          "fields": {
            "match": {
              "type": "text",
              "term_vector": "with_positions_offsets",
              "index_options": "offsets",
              "analyzer": "match_analyzer"
            },
            "prefix": {
              "type": "text",
              "term_vector": "with_positions_offsets",
              "index_options": "offsets",
              "analyzer": "prefix_analyzer",
              "search_analyzer": "search_analyzer"
            }
          }
        }
      }
    }
  }
}

and some data illustrating the example:

POST /_bulk
{"create": {"_id": "1", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Dubai"}
{"create": {"_id": "2", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Dumai"}
{"create": {"_id": "3", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Cuba"}
{"create": {"_id": "4", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Kuba Südküste"}
{"create": {"_id": "5", "_index": "highlight_best_match", "_type": "default"} }
{"title": "Dubai Kreuzfahrt"}

The query is

GET /highlight_best_match/_search
{
  "query": {
    "bool": {
      "must": {
        "bool": {
          "should": [
            {
              "match": {
                "title.prefix": {
                  "query": "duba",
                  "fuzziness": 1,
                  "boost": 1
                }
              }
            },
            {
              "match": {
                "title.match": {
                  "query": "duba",
                  "fuzziness": 1,
                  "boost": 1
                }
              }
            }
          ]
        }
      },
      "should": [
        {
          "match_phrase_prefix": {
            "title.match": {
              "query": "duba",
              "boost": 5
            }
          }
        },
        {
          "match": {
            "title.prefix": {
              "query": "duba",
              "fuzziness": 0,
              "boost": 3
            }
          }
        },
        {
          "match": {
            "title.match": {
              "query": "duba",
              "fuzziness": 0,
              "boost": 10
            }
          }
        }
      ]
    }
  },
  "highlight": {
    "encoder": "plain",
    "order": "score",
    "pre_tags": [
      "<b>"
    ],
    "post_tags": [
      "</b>"
    ],
    "fields": {
      "title.prefix": {
        "type": "fvh",
        "matched_fields": [
          "title.match",
          "title.prefix"
        ]
      }
    }
  }
}

and the result is

{
  "took": 4,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 5,
    "max_score": 11.402948,
    "hits": [
      {
        "_index": "highlight_best_match",
        "_type": "default",
        "_id": "1",
        "_score": 11.402948,
        "_source": {
          "title": "Dubai"
        },
        "highlight": {
          "title.prefix": [
            "<b>Dubai</b>"
          ]
        }
      },
      {
        "_index": "highlight_best_match",
        "_type": "default",
        "_id": "5",
        "_score": 6.812179,
        "_source": {
          "title": "Dubai Kreuzfahrt"
        },
        "highlight": {
          "title.prefix": [
            "<b>Dubai</b> Kreuzfahrt"
          ]
        }
      },
      {
        "_index": "highlight_best_match",
        "_type": "default",
        "_id": "3",
        "_score": 1.5331156,
        "_source": {
          "title": "Cuba"
        },
        "highlight": {
          "title.prefix": [
            "<b>Cuba</b>"
          ]
        }
      },
      {
        "_index": "highlight_best_match",
        "_type": "default",
        "_id": "4",
        "_score": 1.0343978,
        "_source": {
          "title": "Kuba Südküste"
        },
        "highlight": {
          "title.prefix": [
            "<b>Kuba</b> Südküste"
          ]
        }
      },
      {
        "_index": "highlight_best_match",
        "_type": "default",
        "_id": "2",
        "_score": 0.7896109,
        "_source": {
          "title": "Dumai"
        },
        "highlight": {
          "title.prefix": [
            "<b>Duma</b>i"
          ]
        }
      }
    ]
  }
}

Please take a look at the results with ID = 1 and ID = 5. Is it possible to highlight only duba there like <b>Duma</b>i in the result with ID = 2?

I know that I can set highlight_query with the exact query only.
But I'd like to have highlighted results in any case but exact macthes are preferred if they are.

Thank you in advance!

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.