Annotated text highlighter identifies incorrect phrase hit when wrapped in function_score query

Wondering if the following could be a bug or just user error?

Given I create a new index with annotated text fields:

PUT test_index
{
    "mappings": {
        "properties": {
            "a": {
                "type": "annotated_text"
            },
            "b": {
                "type": "annotated_text"
            }
        }
    }
}

And I index a document

POST test_index/_doc/1
{
    "a": "some test data",
    "b": "some more test stuff"
}

When I run the following query (containing function score)

GET  test_index/_search
{
  "query": {
    "function_score": {
      "query": {
        "query_string": {
          "query": "\"test data\""
        }
      },
      "functions": [
        {
          "filter": {
            "query_string": {
                "query": "stuff"
            }
          },
          "weight": 5.0
        }
      ],
      "score_mode": "multiply"
    }
  },
  "highlight": {
    "type": "annotated",
    "fields": {
      "a": {},
      "b": {}
    }
  }
}

Then I expect to get 1 matching highlight fragments, but I get 2:

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 2.8768208,
        "hits": [
            {
                "_index": "test_index",
                "_type": "_doc",
                "_id": "1",
                "_score": 2.8768208,
                "_source": {
                    "a": "some test data",
                    "b": "some more test stuff"
                },
                "highlight": {
                    "a": [
                        "some [test](_hit_term=test) [data](_hit_term=data)"
                    ],
                    "b": [
                        "some more [test](_hit_term=test) stuff"
                    ]
                }
            }
        ]
    }
}

When I run the same query not wrapped in function_score

GET  test_index/_search
{
    "query": {
        "query_string": {
            "query": "\"test data\""
        }
    },
    "highlight": {
        "type": "annotated",
        "fields": {
            "a": {},
            "b": {}
        }
    }
}

Then I get the correct result

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 0.5753642,
        "hits": [
            {
                "_index": "test_index",
                "_type": "_doc",
                "_id": "1",
                "_score": 0.5753642,
                "_source": {
                    "a": "some test data",
                    "b": "some more test stuff"
                },
                "highlight": {
                    "a": [
                        "some [test](_hit_term=test) [data](_hit_term=data)"
                    ]
                }
            }
        ]
    }
}

When I use the plain highlighter with function_score

GET test_index/_search

{
  "query": {
    "function_score": {
      "query": {
        "query_string": {
          "query": "\"test data\""
        }
      },
      "functions": [
        {
          "filter": {
            "query_string": {
                "query": "stuff"
            }
          },
          "weight": 5.0
        }
      ],
      "score_mode": "multiply"
    }
  },
  "highlight": {
    "type": "plain",
    "fields": {
      "a": {},
      "b": {}
    }
  }
}

Then I get the correct result

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 1,
            "relation": "eq"
        },
        "max_score": 2.8768208,
        "hits": [
            {
                "_index": "test_index",
                "_type": "_doc",
                "_id": "1",
                "_score": 2.8768208,
                "_source": {
                    "a": "some test data",
                    "b": "some more test stuff"
                },
                "highlight": {
                    "a": [
                        "some <em>test</em> <em>data</em>"
                    ]
                }
            }
        ]
    }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.