I have created an index which has transcripts of call data and while searching for keyword highlight is not working properly.
Elastic-search version: 6.8.1
{
"settings": {
"index": {
"analysis": {
"filter": {
"minimal_english": {
"name": "minimal_english",
"type": "stemmer"
}
},
"analyzer": {
"observe_kws_analyzer": {
"filter": [
"standard",
"lowercase",
"minimal_english",
"porter_stem"
],
"tokenizer": "standard"
}
}
}
}
}
}
"mapping":{
"properties":{
"account_id":{
"type":"keyword"
},
"duration":{
"type":"integer"
},
"transcript":{
"type":"nested",
"properties":{
"conv_id":{
"type":"integer"
}
"phrase_analyzed":{
"type":"text",
"analyzer": "observe_kws_analyzer"
}
}
}
}
}
Sample Document:
{
"_id":"b89494f7-182d-4ba8-a3ac-d9ea0f0f6ff4",
"account_id": "5be180eef7c85b00017f9477",
"duration": 216,
"transcript": [
{
"conv_id": 26,
"phrase_analyzed": "thank you so much sir and before we continue i need to let you know that i'm a debt collector i attempting continue to collect a debt any information obtained due this for that purpose also the entire call is being recorded and may be monitored for quality and training purposes"
}
]
}
Query:
{
"query": {
"bool": {
"must": [
{
"ids": {
"type": ,
"values": [
"b89494f7-182d-4ba8-a3ac-d9ea0f0f6ff4"
],
"boost": 1
}
},
{
"bool": {
"should": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"transcript.phrase_analyzed": {
"query": "this entire call is being recorded and may be monitored for quality and training purposes",
"slop": 1,
"zero_terms_query": "NONE",
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"path": "transcript",
"ignore_unmapped": false,
"score_mode": "none",
"boost": 1,
"inner_hits": {
"name": "inner_hits_5d2451d7b91e8c3d3e3660cd_10",
"ignore_unmapped": false,
"from": 0,
"size": 100,
"version": false,
"seq_no_primary_term": false,
"explain": false,
"track_scores": false,
"sort": [
{
"transcript.conv_id": {
"order": "asc"
}
}
],
"highlight": {
"pre_tags": [
"<HIGHLIGHT5d2451d7b91e8c3d3e3660cd_10>"
],
"post_tags": [
"<HIGHLIGHT5d2451d7b91e8c3d3e3660cd_10/>"
],
"number_of_fragments": 0,
"fields": {
"transcript.phrase_analyzed": {}
}
}
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"_source": {
"includes": [
"account_id",
"duration"
]
}
}
Response:
{
"hits": {
"hits": [
{
"_type": "meetings",
"_source": {
"duration": 216,
"account_id": "5be180eef7c85b00017f9477"
},
"_score": 1,
"inner_hits": {
"inner_hits_5d2451d7b91e8c3d3e3660cd_10": {
"hits": {
"hits": [
{
"sort": [
26
],
"_type": "meetings",
"_source": {
"conv_id": 26,
"phrase_analyzed": "thank you so much sir and before we continue i need to let you know that i'm a debt collector i attempting continue to collect a debt any information obtained due this for that purpose also the entire call is being recorded and may be monitored for quality and training purposes"
},
"_score": null,
"_index": "meetings_with_transcript",
"_nested": {
"field": "transcript",
"offset": 26
},
"_id": "b89494f7-182d-4ba8-a3ac-d9ea0f0f6ff4"
}
],
"total": 1,
"max_score": null
}
}
},
"_index": "meetings_with_transcript",
"_id": "b89494f7-182d-4ba8-a3ac-d9ea0f0f6ff4"
}
],
"total": 1,
"max_score": 1
},
"_shards": {
"successful": 10,
"failed": 0,
"skipped": 0,
"total": 10
},
"took": 90,
"timed_out": false
}
I am not getting any highlight in this query. Also, if I change the query text to "the entire call is being recorded and may be monitored for quality and training purposes"(This is the case of exact match of keyword), then I get the appropriate highlight.
I think that with the previous query search, highlight should be there. Is this a bug? If not, whats the logic behind this.