Understanding Elasticsearch scoring between versions

We're currently in the midst of an upgrade from v5.2 to v6.8 and I'm seeing some noticeable differences in scoring of records during searches. I can't quite decide if there are possibly some deprecated features as part of the search that might be influencing the scoring results, and I'm wondering if anyone could maybe please take a quick look over an example query and see if I'm missing something? An explain shows minor differences in individual component scoring, but this is enough to breach a min_score threshold on a boolQuery and at times we're seeing almost double the amount of results come back from a search in v6.8 compared to the same search in v5.2.

{
  "from": 0,
  "size": 10,
  "query": {
     "bool": {
        "must": [
           {
              "bool": {
                 "should": [
                    {
                       "multi_match": {
                          "query": "Test",
                          "fields": [
                             "id_no^1.0",
                             "id_name^1.0"
                          ],
                          "type": "best_fields",
                          "operator": "OR",
                          "slop": 0,
                          "fuzziness": "2",
                          "prefix_length": 1,
                          "max_expansions": 50,
                          "lenient": false,
                          "zero_terms_query": "NONE",
                          "boost": 1
                       }
                    },
                    {
                      "match_phrase": {
                         "id_name": {
                            "query": "Test",
                            "slop": 2,
                            "boost": 1
                         }
                      }
                   },
                   {
                      "match": {
                         "ext_id": {
                            "query": "Test",
                            "operator": "OR",
                            "prefix_length": 0,
                            "max_expansions": 50,
                            "fuzzy_transpositions": true,
                            "lenient": false,
                            "zero_terms_query": "NONE",
                            "boost": 2.5
                         }
                      }
                   },
                   {
                      "match": {
                         "id_no": {
                            "query": "Test",
                            "operator": "OR",
                            "prefix_length": 0,
                            "max_expansions": 50,
                            "fuzzy_transpositions": true,
                            "lenient": false,
                            "zero_terms_query": "NONE",
                            "boost": 1
                         }
                      }
                   },
                   {
                      "match": {
                         "id_name": {
                            "query": "Test",
                            "operator": "OR",
                            "prefix_length": 0,
                            "max_expansions": 50,
                            "fuzzy_transpositions": true,
                            "lenient": false,
                            "zero_terms_query": "NONE",
                            "boost": 1
                         }
                      }
                   }
                 ],
                 "disable_coord": false,
                 "adjust_pure_negative": true,
                 "boost": 1
              }
           }
        ],
        "must_not": [
           {
              "exists": {
                 "field": "is_deleted",
                 "boost": 1
              }
           }
        ],
        "disable_coord": false,
        "adjust_pure_negative": true,
        "boost": 1
     }
  },
  "min_score": 7,
  "aggregations": {
     "type_desc": {
        "terms": {
           "field": "type_desc",
           "size": 25,
           "min_doc_count": 1,
           "shard_min_doc_count": 0,
           "show_term_doc_count_error": false,
           "order": [
              {
                 "_term": "asc"
              }
           ]
        }
     },
     "status_desc": {
        "terms": {
           "field": "status_desc",
           "size": 25,
           "min_doc_count": 1,
           "shard_min_doc_count": 0,
           "show_term_doc_count_error": false,
           "order": [
              {
                 "_term": "asc"
              }
           ]
        }
     },
     "ext_status_desc": {
        "terms": {
           "field": "ext_status_desc",
           "size": 25,
           "min_doc_count": 1,
           "shard_min_doc_count": 0,
           "show_term_doc_count_error": false,
           "order": [
              {
                 "_term": "asc"
              }
           ]
        }
     },
     "load_desc": {
        "terms": {
           "field": "load_desc",
           "size": 25,
           "min_doc_count": 1,
           "shard_min_doc_count": 0,
           "show_term_doc_count_error": false,
           "order": [
              {
                 "_term": "asc"
              }
           ]
        }
     }
  }
}

Take a look at query explain and the explain API, that might help to spot the differences a bit easier.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.