I am getting expected highlighting of substring matching user search keyword in companyName and country fields. However in emailId field instead of highlighting the substring the entire field is getting highlighted.
Query
POST customers-index/_search
{
"size": 20,
"query": {
"must": [
{
"multi_match": {
"query": "state",
"fields": [
"emailId.autocomplete",
"companyName.autocomplete",
"country.autocomplete"
]
}
}
]
},
"highlight": {
"fields": {
"emailId.autocomplete": {},
"companyName.autocomplete": {},
"country.autocomplete": {}
}
},
"_source": [
"emailId",
"companyName",
"country"
]
}
Response
[
{
"_index" : "customers-index",
"_type" : "_doc",
"_id" : "c44aefde-22b1-471d-9159-a092e5c604f6",
"_score" : 14.853605,
"_source" : {
"country" : "Ethiopia",
"companyName" : "La Galtoara, Inc.",
"emailId" : [
"galtoara@state.gov"
]
},
"highlight" : {
"emailId.autocomplete" : [
"<em>galtoara@state.gov</em>"
]
}
},
{
"_index" : "customers-index",
"_type" : "_doc",
"_id" : "f76ecf0a-3e7d-41f9-a96f-83c66698f2d1",
"_score" : 3.6045084,
"_source" : {
"country" : "Philippines",
"companyName" : "Belgone State Medical, Inc.",
"emailId" : [
"dopasdfd@apple.com"
]
},
"highlight" : {
"companyName.autocomplete" : [
"Belgone <em>State</em> Medical, Inc."
]
}
},
{
"_index" : "customers-index",
"_type" : "_doc",
"_id" : "b41b1c0c-e84d-4424-a862-38b10d380d23",
"_score" : 2.1431046,
"_source" : {
"country" : "United States",
"companyName" : "DFDFDF Brands Limited",
"emailId" : [
"adfadfad@godaddy.com"
]
},
"highlight" : {
"country.autocomplete" : [
"United <em>State</em>s"
]
}
}
]
Mapping
{
"emailId": {
"type": "text",
"fields": {
"autocomplete": {
"type": "text",
"analyzer": "autocomplete_email_analyzer",
"search_analyzer": "search_analyzer"
},
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
}
},
"analyzer": "index_analyzer",
"search_analyzer": "search_analyzer"
}
}
Setting
{
"analysis": {
"filter": {
"email_filter": {
"type": "pattern_capture",
"preserve_original": "true",
"patterns": [
"/regex-pattern/"
]
},
"starts_with_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "100"
}
},
"analyzer": {
"search_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "keyword"
},
"index_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "index_analyzer"
},
"autocomplete_email_analyzer": {
"filter": [
"email_filter",
"unique",
"starts_with_filter",
"lowercase"
],
"tokenizer": "autocomplete_email_tokenizer"
}
},
"tokenizer": {
"autocomplete_email_tokenizer": {
"type": "uax_url_email"
}
}
}
}
Note: I have added the mappings and settings relevant to emailId field alone as it is the field of concern.
As we can notice from the response, companyName and country fields are highlighted on the substring matching search query level however, emailId field is highlighted entirely.
How to highlight only the substring in the emailId field also. Any help is much appreciated. Thanks in advance.