Ignore TF/IDF in a complex query

LONGMAN · June 26, 2018, 2:09pm

I want to ignore TF/IDF in a complex query.

I tried to create custom similarity script script, but no result. Also, this article is useless for me: https://www.elastic.co/guide/en/elasticsearch/guide/master/ignoring-tfidf.html

This is my index/query

Index settings:

{
  "items-index": {
    "settings": {
      "index": {
        "number_of_shards": "5",
        "provided_name": "internal-items-index-v20180626164241.174744",
        "similarity": {
          "default": {
            "type": "scripted",
            "weight_script": {
              "source": "double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; return query.boost * idf;"
            },
            "script": {
              "source": "double tf = Math.sqrt(doc.freq); return weight * tf;"
            }
          }
        },
        "creation_date": "1530016961296",
        "analysis": {...},
        "number_of_replicas": "1",
        "uuid": "zMRDzPUQTrWy6lLgYq8MiA",
        "version": {
          "created": "6030099"
        }
      }
    }
  }
}

Query:

{
    "explain": true, 
    "from": 0, 
    "highlight": {
        "fields": {
            "aka_names.name": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_eng": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_geo": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_geo_trans": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_rus": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_rus_trans": {
                "fragment_size": 500, 
                "type": "unified"
            }, 
            "name_trans": {
                "fragment_size": 500, 
                "type": "unified"
            }
        }, 
        "order": "score"
    }, 
    "query": {
        "function_score": {
            "field_value_factor": {
                "factor": 1.2, 
                "field": "weight", 
                "missing": 1.0, 
                "modifier": "sqrt"
            }, 
            "query": {
                "bool": {
                    "should": [
                        {
                            "multi_match": {
                                "boost": 2, 
                                "fields": [
                                    "name", 
                                    "name_trans", 
                                    "name_geo", 
                                    "name_geo_trans", 
                                    "name_eng", 
                                    "name_rus", 
                                    "name_rus_trans"
                                ], 
                                "fuzziness": "AUTO", 
                                "query": "My Search Query", 
                                "type": "best_fields"
                            }
                        }, 
                        {
                            "multi_match": {
                                "boost": 5, 
                                "fields": [
                                    "name.raw", 
                                    "name_trans.raw", 
                                    "name_geo.raw", 
                                    "name_geo_trans.raw", 
                                    "name_eng.raw", 
                                    "name_rus.raw", 
                                    "name_rus_trans.raw"
                                ], 
                                "fuzziness": "AUTO", 
                                "query": "My Search Query", 
                                "type": "best_fields"
                            }
                        }, 
                        {
                            "multi_match": {
                                "boost": 10, 
                                "fields": [
                                    "name.raw", 
                                    "name_trans.raw", 
                                    "name_geo.raw", 
                                    "name_geo_trans.raw", 
                                    "name_eng.raw", 
                                    "name_rus.raw", 
                                    "name_rus_trans.raw"
                                ], 
                                "query": "My Search Query", 
                                "type": "best_fields"
                            }
                        }, 
                        {
                            "multi_match": {
                                "boost": 20, 
                                "fields": [
                                    "name.raw", 
                                    "name_geo.raw", 
                                    "name_eng.raw", 
                                    "name_rus.raw"
                                ], 
                                "query": "My Search Query", 
                                "type": "phrase"
                            }
                        }, 
                        {
                            "nested": {
                                "boost": 4, 
                                "path": "aka_names", 
                                "query": {
                                    "bool": {
                                        "should": {
                                            "match": {
                                                "aka_names.name": "My Search Query"
                                            }
                                        }
                                    }
                                }, 
                                "score_mode": "max"
                            }
                        }
                    ]
                }
            }
        }
    }, 
    "size": 20
}

After query in explain I see scoring criteria like field.docCount, field.sumDocFreq, field.sumTotalTermFreq etc.

Can anyone help?

LONGMAN · July 3, 2018, 2:23pm

Anyone?

abdon · July 5, 2018, 12:20pm

explain: true will show you all those criteria, but it should also tell you that the score was actually calculated using the scripted similarity that you have configured. The output should have something like: "description": "score from ScriptedSimilarity(...).

Do you not see that? If so, can you post your mappings, analysis section in the index settings and the actual output of explain?

system · August 2, 2018, 12:28pm

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
Tf-idf custom similarity and bm25 gives same scores and identical results along with a minor problem Elasticsearch	3	454	October 23, 2022
How to disable TF/IDF completely Elasticsearch	7	4693	April 10, 2018
How to complete disable TF-IDF? Elasticsearch	4	4772	February 6, 2017
Which is the best way of disabling IDF? Elasticsearch	5	2910	July 5, 2017
Score based on Term Frequency alone Elasticsearch	2	3918	May 23, 2017

Ignore TF/IDF in a complex query

Related topics