Ignore TF/IDF in a complex query


(Avtandil Kikabidze) #1

I want to ignore TF/IDF in a complex query.

I tried to create custom similarity script script, but no result. Also, this article is useless for me: https://www.elastic.co/guide/en/elasticsearch/guide/master/ignoring-tfidf.html

This is my index/query

Index settings:

{
  "items-index": {
    "settings": {
      "index": {
        "number_of_shards": "5",
        "provided_name": "internal-items-index-v20180626164241.174744",
        "similarity": {
          "default": {
            "type": "scripted",
            "weight_script": {
              "source": "double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; return query.boost * idf;"
            },
            "script": {
              "source": "double tf = Math.sqrt(doc.freq); return weight * tf;"
            }
          }
        },
        "creation_date": "1530016961296",
        "analysis": {...},
        "number_of_replicas": "1",
        "uuid": "zMRDzPUQTrWy6lLgYq8MiA",
        "version": {
          "created": "6030099"
        }
      }
    }
  }
}

Query:

{
    "explain": true, 
    "from": 0, 
    "highlight": {
        "fields": {
            "aka_names.name": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_eng": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_geo": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_geo_trans": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_rus": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_rus_trans": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_trans": {
                "fragment_size": 500, 
                "type": "unified"
            }
        }, 
        "order": "score"
    }, 
    "query": {
        "function_score": {
            "field_value_factor": {
                "factor": 1.2, 
                "field": "weight", 
                "missing": 1.0, 
                "modifier": "sqrt"
            }, 
            "query": {
                "bool": {
                    "should": [
                        {
                            "multi_match": {
                                "boost": 2, 
                                "fields": [
                                    "name", 
                                    "name_trans", 
                                    "name_geo", 
                                    "name_geo_trans", 
                                    "name_eng", 
                                    "name_rus", 
                                    "name_rus_trans"
                                ], 
                                "fuzziness": "AUTO", 
                                "query": "My Search Query", 
                                "type": "best_fields"
                            }
                        }, 
                        {
                            "multi_match": {
                                "boost": 5, 
                                "fields": [
                                    "name.raw", 
                                    "name_trans.raw", 
                                    "name_geo.raw", 
                                    "name_geo_trans.raw", 
                                    "name_eng.raw", 
                                    "name_rus.raw", 
                                    "name_rus_trans.raw"
                                ], 
                                "fuzziness": "AUTO", 
                                "query": "My Search Query", 
                                "type": "best_fields"
                            }
                        }, 
                        {
                            "multi_match": {
                                "boost": 10, 
                                "fields": [
                                    "name.raw", 
                                    "name_trans.raw", 
                                    "name_geo.raw", 
                                    "name_geo_trans.raw", 
                                    "name_eng.raw", 
                                    "name_rus.raw", 
                                    "name_rus_trans.raw"
                                ], 
                                "query": "My Search Query", 
                                "type": "best_fields"
                            }
                        }, 
                        {
                            "multi_match": {
                                "boost": 20, 
                                "fields": [
                                    "name.raw", 
                                    "name_geo.raw", 
                                    "name_eng.raw", 
                                    "name_rus.raw"
                                ], 
                                "query": "My Search Query", 
                                "type": "phrase"
                            }
                        }, 
                        {
                            "nested": {
                                "boost": 4, 
                                "path": "aka_names", 
                                "query": {
                                    "bool": {
                                        "should": {
                                            "match": {
                                                "aka_names.name": "My Search Query"
                                            }
                                        }
                                    }
                                }, 
                                "score_mode": "max"
                            }
                        }
                    ]
                }
            }
        }
    }, 
    "size": 20
}

After query in explain I see scoring criteria like field.docCount, field.sumDocFreq, field.sumTotalTermFreq etc.

Can anyone help?


(Avtandil Kikabidze) #2

Anyone?


(Abdon Pijpelink) #3

explain: true will show you all those criteria, but it should also tell you that the score was actually calculated using the scripted similarity that you have configured. The output should have something like: "description": "score from ScriptedSimilarity(...).

Do you not see that? If so, can you post your mappings, analysis section in the index settings and the actual output of explain?


(system) #4

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.