Highlight_query: nested query with wildcard search


(Alexander Ott) #1

Hello,

for our highlighting currently we have a field "snippet" which contains all contenttext of all nested documents under du.content.content4b.contenttext

We want to remove the snippet field and use a nested query inside the highlight_query which we have defined as followed

{
   "query": {
      "bool": {
         "must": [
            {
               "match": {
                  "du_key": "100010549"
               }
            }
         ]
      }
   },
   "_source": false,
   "highlight": {
      "highlight_query": {
         "bool": {
            "should": [
               {
                  "query_string": {
                     "query": "hund",
                     "default_field": "snippet"
                  }
               },
               {
                  "nested": {
                     "query": {
                        "query_string": {
                           "query": "hund",
                           "default_field": "du.content.content4b.contenttext"
                        }
                     },
                     "path": "du.content"
                  }
               }
            ]
         }
      },
      "fields": {
         "du.content.content4b.contenttext": {
            "fragment_size": 50,
            "number_of_fragments": 3
         },
         "snippet": {
            "fragment_size": 50,
            "number_of_fragments": 3
         }
      }
   }
}

Which returns the following result

  "hits": {
  "total": 1,
  "max_score": 1,
  "hits": [
     {
        "_index": "hr-sd",
        "_type": "searchdoc",
        "_id": "100010549@2@0@0#0",
        "_score": 1,
        "highlight": {
           "snippet": [
              "Schwanenteich, am Ufer schwarzer Schwan, im Teich badet <em>Hund</em> Neufundländer"
           ],
           "du.content.content4b.contenttext": [
              "Schwanenteich, am Ufer schwarzer Schwan, im Teich badet <em>Hund</em> Neufundländer"
           ]
        }
     }
  ]

}

Which seems okay for us. But if we used wildcards "hund*" instead of "hund"

{
   "query": {
      "bool": {
         "must": [
            {
               "match": {
                  "du_key": "100010549"
               }
            }
         ]
      }
   },
   "_source": false,
   "highlight": {
      "highlight_query": {
         "bool": {
            "should": [
               {
                  "query_string": {
                     "query": "hund*",
                     "default_field": "snippet"
                  }
               },
               {
                  "nested": {
                     "query": {
                        "query_string": {
                           "query": "hund*",
                           "default_field": "du.content.content4b.contenttext"
                        }
                     },
                     "path": "du.content"
                  }
               }
            ]
         }
      },
      "fields": {
         "du.content.content4b.contenttext": {
            "fragment_size": 50,
            "number_of_fragments": 3
         },
         "snippet": {
            "fragment_size": 50,
            "number_of_fragments": 3
         }
      }
   }
}

the result looks different and only returns highlight: "snippet"

    "hits": [
     {
        "_index": "hr-sd",
        "_type": "searchdoc",
        "_id": "100010549@2@0@0#0",
        "_score": 1,
        "highlight": {
           "snippet": [
              "Schwanenteich, am Ufer schwarzer Schwan, im Teich badet <em>Hund</em> Neufundländer"
           ]
        }
     }
  ]

Do we make any mistake or is this a known bug?


(Alexander Ott) #2

With this code it is possible to reproduce our issue in Elasticsearch 5.5.0 in Elasticsearch 2.4.4 we can't reproduce this issue

PUT testcase
{
   "settings": {
      "index": {
         "number_of_replicas": 0
      }
   },
   "mappings": {
      "searchdoc": {
         "dynamic_templates": [
            {
               "nested_du_content": {
                  "match": "content4*",
                  "mapping": {
                     "type": "object",
                     "doc_values": false,
                     "properties": {
                        "contenttext": {
                           "type": "text",
                           "doc_values": false,
                           "norms": false,
                           "copy_to": [
                              "du.content.contenttext"
                           ]
                        }
                     }
                  }
               }
            }
         ],
         "properties": {
            "snippet": {
               "type": "text",
               "doc_values": false,
               "norms": false,
               "index": false
            },
            "du": {
               "dynamic": "strict",
               "type": "nested",
               "include_in_root": "true",
               "properties": {
                  "content": {
                     "type": "nested",
                     "include_in_parent": "true",
                     "dynamic": "true",
                     "properties": {
                        "contenttext": {
                           "type": "text",
                           "doc_values": false,
                           "norms": false
                        }
                     }
                  }
               }
            }
         }
      }
   }
}

POST testcase/searchdoc
{
   "snippet": [
      "Bensheim - Auerbach. Das Fürstenlager, ein Landschaftsgarten.",
      "Schwanenteich, am Ufer schwarzer Schwan, im Teich badet Hund Neufundländer",
      "Blumen Engelstrompeten",
      "Gebäude und Anlage Fürstenlager",
      "Quellen, Park Herrenwiese, Hügellandschaft, Tempel, einzelne große alte Bäume, Wein, Eremitage"
   ],
   "du": {
      "content": [
         {
            "content4t": {
               "contenttext": "Bensheim - Auerbach. Das Fürstenlager, ein Landschaftsgarten."
            }
         },
         {
            "content4b": {
               "contenttext": "Schwanenteich, am Ufer schwarzer Schwan, im Teich badet Hund Neufundländer"
            }
         },
         {
            "content4b": {
               "contenttext": "Blumen Engelstrompeten"
            }
         },
         {
            "content4b": {
               "contenttext": "Gebäude und Anlage Fürstenlager"
            }
         },
         {
            "content4b": {
               "contenttext": "Quellen, Park Herrenwiese, Hügellandschaft, Tempel, einzelne große alte Bäume, Wein, Eremitage"
            }
         }
      ]
   }
}

<!-- Okay without wildcard -->
GET testcase/_search
{
   "query": {
      "bool": {
         "must": [
            {
               "match_all": {}
            }
         ]
      }
   },
   "_source": false,
   "highlight": {
      "highlight_query": {
         "bool": {
            "should": [
               {
                  "query_string": {
                     "query": "hund",
                     "default_field": "snippet"
                  }
               },
               {
                  "nested": {
                     "query": {
                        "query_string": {
                           "query": "hund",
                           "default_field": "du.content.content4b.contenttext"
                        }
                     },
                     "path": "du.content"
                  }
               }
            ]
         }
      },
      "fields": {
         "maintitle": {},
         "subtitle": {},
         "du.content.content4b.contenttext": {
            "fragment_size": 50,
            "number_of_fragments": 3
         },
         "snippet": {
            "fragment_size": 50,
            "number_of_fragments": 3
         }
      }
   }
}

<!-- NOT oky with wildcard, only snippet field is highlighted -->
GET testcase/_search
{
   "query": {
      "bool": {
         "must": [
            {
               "match_all": {}
            }
         ]
      }
   },
   "_source": false,
   "highlight": {
      "highlight_query": {
         "bool": {
            "should": [
               {
                  "query_string": {
                     "query": "hund*",
                     "default_field": "snippet"
                  }
               },
               {
                  "nested": {
                     "query": {
                        "query_string": {
                           "query": "hund*",
                           "default_field": "du.content.content4b.contenttext"
                        }
                     },
                     "path": "du.content"
                  }
               }
            ]
         }
      },
      "fields": {
         "maintitle": {},
         "subtitle": {},
         "du.content.content4b.contenttext": {
            "fragment_size": 50,
            "number_of_fragments": 3
         },
         "snippet": {
            "fragment_size": 50,
            "number_of_fragments": 3
         }
      }
   }
}

(Alexander Ott) #3

If anyone is interrested in this issue --> https://github.com/elastic/elasticsearch/issues/26230


(system) #4

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.