Implicit versus explicit definition of the fast vector highlighter

Hi there,

I was under the impression that if the mappings of a field set the term_vector to term_vector: 'with_positions_offsets', that for that field the fast vector highlighter will be used automatically. I thought so because of this comment from the docs: The fast vector highlighter will be used by default for the text field because term vectors are enabled.. The comment can be found at the bottom of this page: term_vector | Elasticsearch Guide [8.3] | Elastic. Do I understand this correctly?

It doesn't seem to be the case though, because when I explicitly set the fvh in the query then the query takes much longer (about 5-10x increase sometimes). If I leave out the explicit type of highlighter in the query then its fast.

"fields": {
    "versions.title": {
        "type": 'fvh', <------- Including this line makes the query much slower
        "number_of_fragments": 0
    },
    "versions.body": {
        "type": "fvh", <------- Including this line makes the query much slower
        "number_of_fragments": 0
    }
}

My question are:

  • Given my mappings and query below, what highlighter is supposed to be used for the title and body?
  • If by default the FVH highlighter is supposed to be used, then why would explicitly setting the FVH suddenly make the query much longer?
  • Is there some way to see which highlighter is being applied?

The mappings:

GET article_set_index/_mappings
{
  "article_set_index": {
    "mappings": {
      "dynamic": "false",
      "properties": {
        "authorIds": {
          "type": "keyword"
        },
        "language": {
          "type": "keyword"
        },
        "uniquePublicationDates": {
          "type": "date"
        },
        "uniqueSerialIds": {
          "type": "keyword"
        },
        "versions": {
          "type": "nested",
          "properties": {
            "authorIds": {
              "type": "keyword"
            },
            "body": {
              "type": "text",
              "term_vector": "with_positions_offsets",
              "analyzer": "folding"
            },
            "created": {
              "type": "date"
            },
            "language": {
              "type": "keyword"
            },
            "publicationDate": {
              "type": "date"
            },
            "published": {
              "type": "date"
            },
            "serialId": {
              "type": "keyword"
            },
            "title": {
              "type": "text",
              "term_vector": "with_positions_offsets",
              "analyzer": "folding"
            }
          }
        }
      }
    }
  }
}

Query without explicitly setting the fvh:

POST article_set_index/_search
{
  "size": 0,
  "query": {
    "nested": {
      "path": "versions",
      "score_mode": "max",
      "inner_hits": {
        "size": 100,
        "highlight": {
          "pre_tags": [
            "<mark>"
          ],
          "post_tags": [
            "</mark>"
          ],
          "fields": {
            "versions.title": {
              "number_of_fragments": 0
            },
            "versions.body": {
              "number_of_fragments": 0
            }
          }
        },
        "sort": {
          "versions.published": {
            "order": "asc"
          }
        }
      },
      "query": {
        "bool": {
          "must": [
            {
              "dis_max": {
                "queries": [
                  {
                    "simple_query_string": {
                      "query": """((startup) | ("start-up") | (tech* onderneming*) -("sprout update")""",
                      "default_operator": "and",
                      "fields": [
                        "versions.title",
                        "versions.body"
                      ]
                    }
                  },
                  {
                    "bool": {
                      "must": [
                        {
                          "simple_query_string": {
                            "query": """(technol*) | (techiek) | (innovat*) | ((startup) | ("start-up") | (tech*) | (artificial) | (kunstmatig*)""",
                            "default_operator": "and",
                            "fields": [
                              "versions.title",
                              "versions.body"
                            ]
                          }
                        }
                      ],
                      "filter": [
                        {
                          "terms": {
                            "versions.authorIds": [
                              "erwin-boogert-9395"
                            ]
                          }
                        }
                      ]
                    }
                  }
                ],
                "tie_breaker": 0.7
              }
            }
          ],
          "filter": [
            {
              "terms": {
                "versions.language": [
                  "nl"
                ]
              }
            },
            {
              "range": {
                "versions.publicationDate": {
                  "gte": "2022-02-11",
                  "lte": "2022-08-11"
                }
              }
            }
          ]
        }
      }
    }
  },
  "aggs": {
    "totalAuthorCount": {
      "cardinality": {
        "field": "authorIds",
        "precision_threshold": 100
      }
    },
    "authors": {
      "terms": {
        "field": "authorIds",
        "size": 121,
        "shard_size": 400
      },
      "aggs": {
        "articles": {
          "top_hits": {
            "size": 5
          }
        }
      }
    }
  },
  "timeout": "180000ms"
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.