I want to ignore TF/IDF in a complex query.
I tried to create custom similarity script script, but no result. Also, this article is useless for me: https://www.elastic.co/guide/en/elasticsearch/guide/master/ignoring-tfidf.html
This is my index/query
Index settings:
{
"items-index": {
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "internal-items-index-v20180626164241.174744",
"similarity": {
"default": {
"type": "scripted",
"weight_script": {
"source": "double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; return query.boost * idf;"
},
"script": {
"source": "double tf = Math.sqrt(doc.freq); return weight * tf;"
}
}
},
"creation_date": "1530016961296",
"analysis": {...},
"number_of_replicas": "1",
"uuid": "zMRDzPUQTrWy6lLgYq8MiA",
"version": {
"created": "6030099"
}
}
}
}
}
Query:
{
"explain": true,
"from": 0,
"highlight": {
"fields": {
"aka_names.name": {
"fragment_size": 500,
"type": "unified"
},
"name": {
"fragment_size": 500,
"type": "unified"
},
"name_eng": {
"fragment_size": 500,
"type": "unified"
},
"name_geo": {
"fragment_size": 500,
"type": "unified"
},
"name_geo_trans": {
"fragment_size": 500,
"type": "unified"
},
"name_rus": {
"fragment_size": 500,
"type": "unified"
},
"name_rus_trans": {
"fragment_size": 500,
"type": "unified"
},
"name_trans": {
"fragment_size": 500,
"type": "unified"
}
},
"order": "score"
},
"query": {
"function_score": {
"field_value_factor": {
"factor": 1.2,
"field": "weight",
"missing": 1.0,
"modifier": "sqrt"
},
"query": {
"bool": {
"should": [
{
"multi_match": {
"boost": 2,
"fields": [
"name",
"name_trans",
"name_geo",
"name_geo_trans",
"name_eng",
"name_rus",
"name_rus_trans"
],
"fuzziness": "AUTO",
"query": "My Search Query",
"type": "best_fields"
}
},
{
"multi_match": {
"boost": 5,
"fields": [
"name.raw",
"name_trans.raw",
"name_geo.raw",
"name_geo_trans.raw",
"name_eng.raw",
"name_rus.raw",
"name_rus_trans.raw"
],
"fuzziness": "AUTO",
"query": "My Search Query",
"type": "best_fields"
}
},
{
"multi_match": {
"boost": 10,
"fields": [
"name.raw",
"name_trans.raw",
"name_geo.raw",
"name_geo_trans.raw",
"name_eng.raw",
"name_rus.raw",
"name_rus_trans.raw"
],
"query": "My Search Query",
"type": "best_fields"
}
},
{
"multi_match": {
"boost": 20,
"fields": [
"name.raw",
"name_geo.raw",
"name_eng.raw",
"name_rus.raw"
],
"query": "My Search Query",
"type": "phrase"
}
},
{
"nested": {
"boost": 4,
"path": "aka_names",
"query": {
"bool": {
"should": {
"match": {
"aka_names.name": "My Search Query"
}
}
}
},
"score_mode": "max"
}
}
]
}
}
}
},
"size": 20
}
After query in explain I see scoring criteria like field.docCount
, field.sumDocFreq
, field.sumTotalTermFreq
etc.
Can anyone help?