Has MultiMatch's BestFields functionality changed?


#1

Hi all,

How can I tune this query so that the 4th result (see below) which matches on both words ('A61' & 'roundabout') appears first in the list? Or is there a better way to query for this altogether (note the mapping at the bottom of the post)?

_search query

{
  "from": 0,
  "size": 10,
  "highlight": {
... stripped for brevity ...
    }
  },
  "_source": {
    "excludes": [
      "*"
    ]
  },
  "query": {
    "bool": {
      "should": [
        {
          "multi_match": {
            "type": "best_fields",
            "query": "a61 roundabout",
            "fields": [
              "code",
              "code.standard",
              "name",
              "notes",
              "notes.english",
              "shortName"
            ]
          }
        },
        {
          "nested": {
            "query": {
              "multi_match": {
                "type": "best_fields",
                "query": "a61 roundabout",
                "fields": [
                  "screenData.valueString",
                  "screenData.valueString.english"
                ]
              }
            },
            "path": "screenData"
          }
        }
      ],
      "minimum_should_match": 1
    }
  }
}

_search results

{
    "took": 16,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 5,
        "max_score": 8.464119,
        "hits": [
            {
                "_index": "matters",
                "_type": "matter",
                "_id": "71",
                "_score": 8.464119,
                "_source": {},
                "highlight": {
                    "screenData.valueString": [
                        "<mark>A61</mark>"
                    ],
                    "screenData.valueString.english": [
                        "<mark>A61</mark>"
                    ]
                }
            },
            {
                "_index": "matters",
                "_type": "matter",
                "_id": "14",
                "_score": 7.368354,
                "_source": {},
                "highlight": {
                    "screenData.valueString": [
                        "Lawnswood <mark>Roundabout</mark>"
                    ],
                    "screenData.valueString.english": [
                        "Lawnswood <mark>Roundabout</mark>"
                    ]
                }
            },
            {
                "_index": "matters",
                "_type": "matter",
                "_id": "62",
                "_score": 6.31734,
                "_source": {},
                "highlight": {
                    "screenData.valueString": [
                        "Meanwood <mark>Roundabout</mark>"
                    ],
                    "screenData.valueString.english": [
                        "Meanwood <mark>Roundabout</mark>"
                    ]
                }
            },
            {
                "_index": "matters",
                "_type": "matter",
                "_id": "59",
                "_score": 5.680235,
                "_source": {},
                "highlight": {
                    "screenData.valueString": [
                        "<mark>A61</mark> direction Leeds, before the Leeds ring road <mark>roundabout</mark>."
                    ],
                    "screenData.valueString.english": [
                        "<mark>A61</mark> direction Leeds, before the Leeds ring road <mark>roundabout</mark>."
                    ]
                }
            },
            {
                "_index": "matters",
                "_type": "matter",
                "_id": "67",
                "_score": 3.277606,
                "_source": {},
                "highlight": {
                    "screenData.valueString": [
                        "<mark>A61</mark> Harrogate Road, directions Leeds city centre"
                    ],
                    "screenData.valueString.english": [
                        "<mark>A61</mark> Harrogate Road, directions Leeds city centre"
                    ]
                }
            }
        ]
    }
}

_mapping

{
    "matters": {
        "mappings": {
            "matter": {
                "dynamic": "false",
                "date_detection": false,
                "numeric_detection": false,
                "properties": {
... stripped for brevity ...
                "screenData": {
                    "type": "nested",
                    "dynamic": "false",
                    "properties": {
                        "fieldId": {
                            "type": "long"
                        },
                        "group": {
                            "type": "integer"
                        },
                        "isConfidential": {
                            "type": "boolean"
                        },
                        "matterTypeId": {
                            "type": "integer"
                        },
                        "screenId": {
                            "type": "long"
                        },
                        "systemId": {
                            "type": "integer"
                        },
                        "valueBool": {
                            "type": "boolean"
                        },
                        "valueDatetime": {
                            "type": "date"
                        },
                        "valueDouble": {
                            "type": "double"
                        },
                        "valueInt": {
                            "type": "long"
                        },
                        "valueString": {
                            "type": "text",
                            "fields": {
                                "english": {
                                    "type": "text",
                                    "analyzer": "english"
                                }
                            }
                        }
                    }
                }
... stripped for brevity ...
            }
        }
    }
}

#2

Is there anything I can add to help get an answer to this?


#3

What you could do is to add another multi_match clause with an AND operator ("operator": "and") then add a boost to increase the relevancy of the documents that have all the search terms.

By default the match clause uses the operator OR, if not specified.

https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html#query-dsl-match-query-boolean


#4

@klof Thanks for the suggestion. Do you mean I should break up the search term and have one multi_match per term?

Would you mind posting an example query as I'm not too sure what you mean..?


#5

No, what i meant, is to have another multi_match clause with the and operator... if you just want to increase the score of the 4th document, the and operator will require to have all the search terms in one of the fields ("a61" AND "roundabout").

ex. :

GET index/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "multi_match": {
            "type": "best_fields",
            "query": "a61 roundabout",
            "fields": [
              "code",
              "code.standard",
              "name",
              "notes",
              "notes.english",
              "shortName"
            ],
            "operator": "and",
            "boost": 3
          }
        },
        {
          "multi_match": {
            "type": "best_fields",
            "query": "a61 roundabout",
            "fields": [
              "code",
              "code.standard",
              "name",
              "notes",
              "notes.english",
              "shortName"
            ]
          }
        },
        {
          "nested": {
            "query": {
              "multi_match": {
                "type": "best_fields",
                "query": "a61 roundabout",
                "fields": [
                  "screenData.valueString",
                  "screenData.valueString.english"
                ],
                "operator": "and",
                "boost": 3
              }
            },
            "path": "screenData"
          }
        },
        {
          "nested": {
            "query": {
              "multi_match": {
                "type": "best_fields",
                "query": "a61 roundabout",
                "fields": [
                  "screenData.valueString",
                  "screenData.valueString.english"
                ]
              }
            },
            "path": "screenData"
          }
        }
      ],
      "minimum_should_match": 1
    }
  }
}

Ex without the boost, if the score (order) of the other documents doesn't matter, filter + should clause:

  GET index/_search
  "query": {
    "bool": {
      "filter": [
        {
          "multi_match": {
            "type": "best_fields",
            "query": "a61 roundabout",
            "fields": [
              "code",
              "code.standard",
              "name",
              "notes",
              "notes.english",
              "shortName"
            ]
          }
        },
        {
          "nested": {
            "query": {
              "multi_match": {
                "type": "best_fields",
                "query": "a61 roundabout",
                "fields": [
                  "screenData.valueString",
                  "screenData.valueString.english"
                ]
              }
            },
            "path": "screenData"
          }
        }
      ],
      "should": [
        {
          "multi_match": {
            "type": "best_fields",
            "query": "a61 roundabout",
            "fields": [
              "code",
              "code.standard",
              "name",
              "notes",
              "notes.english",
              "shortName"
            ],
            "operator": "and"
          }
        },
        {
          "nested": {
            "query": {
              "multi_match": {
                "type": "best_fields",
                "query": "a61 roundabout",
                "fields": [
                  "screenData.valueString",
                  "screenData.valueString.english"
                ],
                "operator": "and"
              }
            },
            "path": "screenData"
          }
        }
      ],
      "minimum_should_match": 1
    }
  }

#6

@klof Thanks, I understand now.

To be honest I'm a bit puzzled because I understood that 'type': 'best_match' tells ES to prioritise scoring with a preference to multiple matches in the same field.

We recently upgraded from ES2 to ES6 and it looks like this functionality must have changed. Documents that were returning top of certain queries (like the one above) are now no longer returning top despite multiple matches in the same field.


#7

Yes you're right,
best_fields should do the job as well.
I thought it was because of the nested fields that create sub-documents, so it would be also good to try with flatten documents? But if it was working before, it's more likely because of the upgrade.

I've never tried this, but maybe you could try to change the similarity algorithm then test it. ES changed to BM25 since ES 5.


#8

Ah, cool, so my understanding is right.

I'll have a look at changing the similarity algorithm then - hmm, where to start..?

@Clinton_Gormley If you get 2 minutes, would you mind confirming my findings are as you would expect?


(system) #9

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.