Highlight term issue


(Komal Ankoliya) #1

1.We created an index with below settings and mapping.

PUT http://localhost:9200/essearch
{ 
"mappings": {
        "object": {
            "_all": {
                "enabled": false
            },
            "properties": {
               "content": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "similarity": "classic",
                    "analyzer": "content_standard"
                },
                "content_phonic": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "similarity": "classic",
                    "analyzer": "content_phonetic"
                },
                "content_stemming": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "similarity": "classic",
                    "analyzer": "content_stemming"
                }
            }
        }
    },
 "settings": {
        "index": {
            "number_of_shards": "1",
            "similarity": {
                "default": {
                    "type": "classic"
                }
            },
            "max_result_window": "50000",
            "mapper": {
                "dynamic": "false"
            },
            "analysis": {
                "filter": {
                    "content_phonetic": {
                        "type": "phonetic",
                        "encoder": "doublemetaphone"
                    },
                    "StopWords": {
                        "type": "stop",
                        "stopwords": [
                            "after",
                            "all",
                            "under",
                            "very",
                            "well"]
                    }
                },
                "analyzer": {
                    "content_phonetic": {
                        "filter": [
                            "content_phonetic"
                        ],
                        "char_filter": [
                            "CharFilters"
                        ],
                        "type": "custom",
                        "tokenizer": "standard"
                    },
                    "content_stemming": {
                        "filter": [
                            "lowercase",
                            "porter_stem"
                        ],
                        "char_filter": [
                            "CharFilters"
                        ],
                        "type": "custom",
                        "tokenizer": "standard"
                    },
                    "content_standard": {
                        "filter": [
                            "lowercase",
                            "StopWords"
                        ],
                        "char_filter": [
                            "CharFilters"
                        ],
                        "type": "custom",
                        "tokenizer": "standard"
                    }
                },
                "char_filter": {
                    "CharFilters": {
                        "type": "mapping",
                        "mappings": [
                            ". => ' '",
                            "' => ' '",
                            "_ => ' '",
                            ": => ' '"
                        ]
                    }
                }
            },
            "number_of_replicas": "0"
        }
    }}

2: Indexed a document

http://localhost:9200/essearch/object/1
{ "content" : "beginning thirty days after the anticipated COD. 
			 Buyer shall be responsible for all natural gas and electrical imbalance charges.
			 All prices shall be at the Reference Conditions.
			 Buyer’s performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights under said requests to Buyer.  Buyer shall have full dispatch rights subject to operational parameters  (including ramp rates. buyer said to me..."   }

3: Performed Highlight query

http://localhost:9200/essearch/_search
 {
 "highlight": {
"pre_tags": [ "<term0 style='background-color:Lime'>", "<term1 style='background-color:Chocolate'>", "<term2 style='background-color:Pink'>"
],"post_tags": [ "</term0>", "</term1>", "</term2>" ],
"encoder": "html",
"fields": { "content": { "fragment_size": 50, "number_of_fragments": 0, "type": "fvh" } } },
"_source": false,
"query": {
"bool": {
  "must": [
    {
      "query_string": {
        "query": "(\"under said\") OR (said) OR (buyer)",
        "default_field": "content"}} ],
  "filter": [
    {
      "ids": {
        "values": [ "1" ] } } ] } } }

4: Highlight Query Output

{
"took": 0,
"timed_out": false,
"_shards": {
    "total": 1,
    "successful": 1,
    "failed": 0
},
"hits": {
    "total": 1,
    "max_score": 0.30490398,
    "hits": [
        {
            "_index": "essearch",
            "_type": "object",
            "_id": "1",
            "_score": 0.30490398,
            "highlight": {
                "content": [
                    "beginning thirty days after the anticipated COD.
					<term1 style='background-color:Chocolate'>Buyer</term1> 
					shall be responsible for all natural gas and electrical imbalance charges.
					All prices shall be at the Reference Conditions.Buyer’s performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights under <term0 style='background-color:Lime'>said</term0> requests	to <term1 style='background-color:Chocolate'>Buyer</term1>. <term1 style='background-color:Chocolate'>Buyer</term1> shall have full dispatch rights subject to operational parameters (including ramp rates. <term1 style='background-color:Chocolate'>buyer</term1> <term0 style='background-color:Lime'>said</term0> to me..."
                ]    }  } ] } }

If you see we have applied pre,post tags according to the number of query terms supplied. Here we have 3 terms with OR operator so total three pre,post tags are supplied. After performing highlight query it should apply <term1 style='background-color:Chocolate'> to "said" term as per the sequence but ES is providing <term0 style='background-color:Lime'> to "said" term and for "buyer" its applying <term1 style='background-color:Chocolate'>.


(system) #2

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.