为什么相同的文本搜索结果的_score不同？

watson · March 10, 2017, 10:59am

**Elasticsearch version2.3.3:

Plugins installed: [head,ik]

**JVM versionopenjdk version "1.8.0_111":

**OS versionCentOS release 6.8 (Final):

+++++++++++++++++++++++++++++++++++++++++
my mapping：
"mappings": {
"weixinMedia": {
"_all": {
"analyzer": "ik_max_word"
},
"properties": {
"mediaScore": {
"type": "float"
},
"weixinId": {
"type": "string"
},
"pi": {
"type": "long"
},
"mediaNameLen": {
"type": "integer"
},
"pmid": {
"index": "not_analyzed",
"type": "string"
},
"mediaName": {
"analyzer": "ik_max_word",
"term_vector": "with_positions_offsets",
"boost": 2,
"type": "string"
}
}
},
+++++++++++++++++++++++++++++++++++++++++
my request:
{
"track_scores" : "true",
"sort" : {
"_script" : {
"type" : "number",
"script" : "_score+(doc.pi.value/100.0)",
"order" : "desc"
}
},
"query" : {
"match_phrase" : { "mediaName" : "微美食" }
}
}
+++++++++++++++++++++++++++++++++++++++++
expect result:
{
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MzA5NTgyOTUyNA==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食",
"pmid" : "MzA5NTgyOTUyNA==",
"mediaNameLen" : 3,
"mediaScore" : 2.2,
"pi" : 600
},
"sort" : [ 23.91651153564453 ]
}, {
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MjM5NzMxOTM4MQ==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食",
"pmid" : "MjM5NzMxOTM4MQ==",
"mediaNameLen" : 3,
"mediaScore" : 2.062,
"pi" : 506
},
"sort" : [ 22.97651153564453 ]
}, {
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MjM5OTg0OTcwMg==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食西安",
"pmid" : "MjM5OTg0OTcwMg==",
"mediaNameLen" : 5,
"mediaScore" : 2.308,
"pi" : 629
},
"sort" : [ 24.20651153564453 ]
},
+++++++++++++++++++++++++++++++++++++++++

actual result with explain:
{
"_shard" : 4,
"_node" : "uSTOGHNUSjGNFweC7RynRQ",
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MjM5OTg0OTcwMg==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食西安",
"pmid" : "MjM5OTg0OTcwMg==",
"mediaNameLen" : 5,
"mediaScore" : 2.308,
"pi" : 629
},
"sort" : [ 24.20651153564453 ],
"_explanation" : {
"value" : 17.916512,
"description" : "sum of:",
"details" : [ {
"value" : 17.916512,
"description" : "weight(mediaName:"微美食食" in 161368) [PerFieldSimilarity], result of:",
"details" : [ {
"value" : 17.916512,
"description" : "fieldWeight in 161368, product of:",
"details" : [ {
"value" : 1.0,
"description" : "tf(freq=1.0), with freq of:",
"details" : [ {
"value" : 1.0,
"description" : "phraseFreq=1.0",
"details" : [ ]
} ]
}, {
"value" : 17.916512,
"description" : "idf(), sum of:",
"details" : [ {
"value" : 4.92588,
"description" : "idf(docFreq=9932, maxDocs=503580)",
"details" : [ ]
}, {
"value" : 7.0545344,
"description" : "idf(docFreq=1181, maxDocs=503580)",
"details" : [ ]
}, {
"value" : 5.9360976,
"description" : "idf(docFreq=3616, maxDocs=503580)",
"details" : [ ]

"value" : 1.0,
"description" : "fieldNorm(doc=161368)",
"details" : [ ]

"value" : 0.0,
"description" : "match on required clause, product of:",
"details" : [ {
"value" : 0.0,
"description" : "# clause",
"details" : [ ]
}, {
"value" : 0.055814438,
"description" : "_type:weixinMedia, product of:",
"details" : [ {
"value" : 1.0,
"description" : "boost",
"details" : [ ]
}, {
"value" : 0.055814438,
"description" : "queryNorm",
"details" : [ ]

"_shard" : 3,
"_node" : "uSTOGHNUSjGNFweC7RynRQ",
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MzA5NTgyOTUyNA==",
"_score" : 17.796759,
"_source" : {
"mediaName" : "微美食",
"pmid" : "MzA5NTgyOTUyNA==",
"mediaNameLen" : 3,
"mediaScore" : 2.2,
"pi" : 600
},
"sort" : [ 23.7967586517334 ],
"_explanation" : {
"value" : 17.796759,
"description" : "sum of:",
"details" : [ {
"value" : 17.796759,
"description" : "weight(mediaName:"微美食食" in 120379) [PerFieldSimilarity], result of:",
"details" : [ {
"value" : 17.796759,
"description" : "score(doc=120379,freq=1.0), product of:",
"details" : [ {
"value" : 0.99999994,
"description" : "queryWeight, product of:",
"details" : [ {
"value" : 17.79676,
"description" : "idf(), sum of:",
"details" : [ {
"value" : 4.93295,
"description" : "idf(docFreq=10996, maxDocs=561478)",
"details" : [ ]

"value" : 6.9785085,
"description" : "idf(docFreq=1421, maxDocs=561478)",
"details" : [ ]
}, {
"value" : 5.885302,
"description" : "idf(docFreq=4242, maxDocs=561478)",
"details" : [ ]

"value" : 0.05619,
"description" : "queryNorm",
"details" : [ ]

"value" : 17.79676,
"description" : "fieldWeight in 120379, product of:",
"details" : [ {
"value" : 1.0,
"description" : "tf(freq=1.0), with freq of:",
"details" : [ {
"value" : 1.0,
"description" : "phraseFreq=1.0",
"details" : [ ]

"value" : 17.79676,
"description" : "idf(), sum of:",
"details" : [ {
"value" : 4.93295,
"description" : "idf(docFreq=10996, maxDocs=561478)",
"details" : [ ]
}, {
"value" : 6.9785085,
"description" : "idf(docFreq=1421, maxDocs=561478)",
"details" : [ ]
}, {
"value" : 5.885302,
"description" : "idf(docFreq=4242, maxDocs=561478)",
"details" : [ ]
} ]
}, {
"value" : 1.0,
"description" : "fieldNorm(doc=120379)",
"details" : [ ]

"value" : 0.0,
"description" : "match on required clause, product of:",
"details" : [ {
"value" : 0.0,
"description" : "# clause",
"details" : [ ]
}, {
"value" : 0.05619,
"description" : "_type:weixinMedia, product of:",
"details" : [ {
"value" : 1.0,
"description" : "boost",
"details" : [ ]
}, {
"value" : 0.05619,
"description" : "queryNorm",
"details" : [ ]

"_shard" : 4,
"_node" : "uSTOGHNUSjGNFweC7RynRQ",
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MjM5NzMxOTM4MQ==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食",
"pmid" : "MjM5NzMxOTM4MQ==",
"mediaNameLen" : 3,
"mediaScore" : 2.062,
"pi" : 506
},
"sort" : [ 22.97651153564453 ],
"_explanation" : {
"value" : 17.916512,
"description" : "sum of:",
"details" : [ {
"value" : 17.916512,
"description" : "weight(mediaName:"微美食食" in 364138) [PerFieldSimilarity], result of:",
"details" : [ {
"value" : 17.916512,
"description" : "fieldWeight in 364138, product of:",
"details" : [ {
"value" : 1.0,
"description" : "tf(freq=1.0), with freq of:",
"details" : [ {
"value" : 1.0,
"description" : "phraseFreq=1.0",
"details" : [ ]
} ]
}, {
"value" : 17.916512,
"description" : "idf(), sum of:",
"details" : [ {
"value" : 4.92588,
"description" : "idf(docFreq=9932, maxDocs=503580)",
"details" : [ ]
}, {
"value" : 7.0545344,
"description" : "idf(docFreq=1181, maxDocs=503580)",
"details" : [ ]
}, {
"value" : 5.9360976,
"description" : "idf(docFreq=3616, maxDocs=503580)",
"details" : [ ]
} ]
}, {
"value" : 1.0,
"description" : "fieldNorm(doc=364138)",
"details" : [ ]

+++++++++++++++++++++++++++++++++++++++++
不知道为什么搜索出来两个“微美食”的评分不同，评分最高的不是完全匹配的，这个是为什么呢？
thanks!

medcl.net · March 20, 2017, 12:38pm

你的评分公式不是自己指定了会使用pi这个值么，看返回，这些pi值是不同的

watson · March 27, 2017, 8:45am

我说的不同是es得出的_score值不同，不是我计算的结果。
es得出的_score的值为什么不同？
"index" : "mediab",
"_type" : "weixinMedia",
"_id" : "MzA5NTgyOTUyNA==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食",
"pmid" : "MzA5NTgyOTUyNA==",
"mediaNameLen" : 3,
"mediaScore" : 2.2,
"pi" : 600
},
"sort" : [ 23.91651153564453 ]
}

medcl.net · May 29, 2017, 3:35pm

1.首先要知道es底层按分片来存储索引，每个索引各种的评分和该shard索引内的倒排表有关，所有每个分片的数据都是不一样的，打出来的分不一定是一样的，尽管他们的term一样。
2.全文检索针对的是field分词之后的结果，所以他们都匹配上就能查询出来，而评分默认是按各自shard内的评分算出来的。

Topic		Replies	Views
Why did the same text get different _score? Elasticsearch	0	542	March 11, 2017
Score varying for same values Elasticsearch	2	739	June 3, 2011
Different score for exact same keyword Elasticsearch	4	4175	February 20, 2014
Why elasticsearch gives different scores to identical documents Elasticsearch	1	631	May 29, 2018
How is it calculated _score Elasticsearch	4	480	October 29, 2014

为什么相同的文本搜索结果的_score不同？

Related topics