How to highlight the matching subtext in elasticsearch

I am getting expected highlighting of substring matching user search keyword in companyName and country fields. However in emailId field instead of highlighting the substring the entire field is getting highlighted.

Query

POST customers-index/_search
{
  "size": 20,
  "query": {
    "must": [
      {
        "multi_match": {
          "query": "state",
          "fields": [
            "emailId.autocomplete",
            "companyName.autocomplete",
            "country.autocomplete"
          ]
        }
      }
    ]
  },
  "highlight": {
    "fields": {
      "emailId.autocomplete": {},
      "companyName.autocomplete": {},
      "country.autocomplete": {}
    }
  },
  "_source": [
    "emailId",
    "companyName",
    "country"
  ]
}

Response

[
 {
        "_index" : "customers-index",
        "_type" : "_doc",
        "_id" : "c44aefde-22b1-471d-9159-a092e5c604f6",
        "_score" : 14.853605,
        "_source" : {
          "country" : "Ethiopia",
          "companyName" : "La Galtoara, Inc.",
          "emailId" : [
            "galtoara@state.gov"
          ]
        },
        "highlight" : {
          "emailId.autocomplete" : [
            "<em>galtoara@state.gov</em>"
          ]
        }
      },
      {
        "_index" : "customers-index",
        "_type" : "_doc",
        "_id" : "f76ecf0a-3e7d-41f9-a96f-83c66698f2d1",
        "_score" : 3.6045084,
        "_source" : {
          "country" : "Philippines",
          "companyName" : "Belgone State Medical, Inc.",
          "emailId" : [
            "dopasdfd@apple.com"
          ]
        },
        "highlight" : {
          "companyName.autocomplete" : [
            "Belgone <em>State</em> Medical, Inc."
          ]
        }
      },
      {
        "_index" : "customers-index",
        "_type" : "_doc",
        "_id" : "b41b1c0c-e84d-4424-a862-38b10d380d23",
        "_score" : 2.1431046,
        "_source" : {
          "country" : "United States",
          "companyName" : "DFDFDF Brands Limited",
          "emailId" : [
            "adfadfad@godaddy.com"
          ]
        },
        "highlight" : {
          "country.autocomplete" : [
            "United <em>State</em>s"
          ]
        }
      }
]

Mapping

{
  "emailId": {
    "type": "text",
    "fields": {
      "autocomplete": {
        "type": "text",
        "analyzer": "autocomplete_email_analyzer",
        "search_analyzer": "search_analyzer"
      },
      "keyword": {
        "type": "keyword",
        "ignore_above": 256,
        "normalizer": "lowercase_normalizer"
      }
    },
    "analyzer": "index_analyzer",
    "search_analyzer": "search_analyzer"
  }
}

Setting

{
  "analysis": {
    "filter": {
      "email_filter": {
        "type": "pattern_capture",
        "preserve_original": "true",
        "patterns": [
          "/regex-pattern/"
        ]
      },
      "starts_with_filter": {
        "type": "edge_ngram",
        "min_gram": "1",
        "max_gram": "100"
      }
    },
    "analyzer": {
      "search_analyzer": {
        "filter": [
          "lowercase"
        ],
        "tokenizer": "keyword"
      },
      "index_analyzer": {
        "filter": [
          "lowercase"
        ],
        "tokenizer": "index_analyzer"
      },
      "autocomplete_email_analyzer": {
        "filter": [
          "email_filter",
          "unique",
          "starts_with_filter",
          "lowercase"
        ],
        "tokenizer": "autocomplete_email_tokenizer"
      }
    },
    "tokenizer": {
      "autocomplete_email_tokenizer": {
        "type": "uax_url_email"
      }
    }
  }
}

Note: I have added the mappings and settings relevant to emailId field alone as it is the field of concern.

As we can notice from the response, companyName and country fields are highlighted on the substring matching search query level however, emailId field is highlighted entirely.

How to highlight only the substring in the emailId field also. Any help is much appreciated. Thanks in advance.

Updated Setting

{
  "analysis": {
    "filter": {
      "email_filter": {
        "type": "pattern_capture",
        "preserve_original": "true",
        "patterns": [
          """(?=([@|\.|\!|\#|\$|%|&|'|\*|\+|\-|\/|\=|\?|\^|\_|\`|\{|\||\}|\~](.+)))"""
        ]
      },
      "starts_with_filter": {
        "type": "edge_ngram",
        "min_gram": "1",
        "max_gram": "100"
      }
    },
    "analyzer": {
      "search_analyzer": {
        "filter": [
          "lowercase"
        ],
        "tokenizer": "keyword"
      },
      "index_analyzer": {
        "filter": [
          "lowercase"
        ],
        "tokenizer": "index_analyzer"
      },
      "autocomplete_email_analyzer": {
        "filter": [
          "email_filter",
          "unique",
          "starts_with_filter",
          "lowercase"
        ],
        "tokenizer": "autocomplete_email_tokenizer"
      }
    },
    "normalizer": {
      "lowercase_normalizer": {
        "filter": [
          "lowercase"
        ],
        "type": "custom",
        "char_filter": []
      }
    },
    "tokenizer": {
      "autocomplete_email_tokenizer": {
        "type": "uax_url_email"
      },
      "index_analyzer": {
        "token_chars": [
          "letter",
          "digit",
          "whitespace",
          "punctuation",
          "symbol"
        ],
        "min_gram": "2",
        "type": "ngram",
        "max_gram": "30"
      }
    }
  }
}
 

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.