为什么相同的文本搜索结果的_score不同?


(watson) #1

**Elasticsearch version2.3.3:

Plugins installed: [head,ik]

**JVM versionopenjdk version "1.8.0_111":

**OS versionCentOS release 6.8 (Final):

+++++++++++++++++++++++++++++++++++++++++
my mapping:
"mappings": {
"weixinMedia": {
"_all": {
"analyzer": "ik_max_word"
},
"properties": {
"mediaScore": {
"type": "float"
},
"weixinId": {
"type": "string"
},
"pi": {
"type": "long"
},
"mediaNameLen": {
"type": "integer"
},
"pmid": {
"index": "not_analyzed",
"type": "string"
},
"mediaName": {
"analyzer": "ik_max_word",
"term_vector": "with_positions_offsets",
"boost": 2,
"type": "string"
}
}
},
+++++++++++++++++++++++++++++++++++++++++
my request:
{
"track_scores" : "true",
"sort" : {
"_script" : {
"type" : "number",
"script" : "_score+(doc.pi.value/100.0)",
"order" : "desc"
}
},
"query" : {
"match_phrase" : { "mediaName" : "微美食" }
}
}
+++++++++++++++++++++++++++++++++++++++++
expect result:
{
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MzA5NTgyOTUyNA==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食",
"pmid" : "MzA5NTgyOTUyNA==",
"mediaNameLen" : 3,
"mediaScore" : 2.2,
"pi" : 600
},
"sort" : [ 23.91651153564453 ]
}, {
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MjM5NzMxOTM4MQ==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食",
"pmid" : "MjM5NzMxOTM4MQ==",
"mediaNameLen" : 3,
"mediaScore" : 2.062,
"pi" : 506
},
"sort" : [ 22.97651153564453 ]
}, {
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MjM5OTg0OTcwMg==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食西安",
"pmid" : "MjM5OTg0OTcwMg==",
"mediaNameLen" : 5,
"mediaScore" : 2.308,
"pi" : 629
},
"sort" : [ 24.20651153564453 ]
},
+++++++++++++++++++++++++++++++++++++++++

actual result with explain:
{
"_shard" : 4,
"_node" : "uSTOGHNUSjGNFweC7RynRQ",
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MjM5OTg0OTcwMg==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食西安",
"pmid" : "MjM5OTg0OTcwMg==",
"mediaNameLen" : 5,
"mediaScore" : 2.308,
"pi" : 629
},
"sort" : [ 24.20651153564453 ],
"_explanation" : {
"value" : 17.916512,
"description" : "sum of:",
"details" : [ {
"value" : 17.916512,
"description" : "weight(mediaName:"微 美食 食" in 161368) [PerFieldSimilarity], result of:",
"details" : [ {
"value" : 17.916512,
"description" : "fieldWeight in 161368, product of:",
"details" : [ {
"value" : 1.0,
"description" : "tf(freq=1.0), with freq of:",
"details" : [ {
"value" : 1.0,
"description" : "phraseFreq=1.0",
"details" : [ ]
} ]
}, {
"value" : 17.916512,
"description" : "idf(), sum of:",
"details" : [ {
"value" : 4.92588,
"description" : "idf(docFreq=9932, maxDocs=503580)",
"details" : [ ]
}, {
"value" : 7.0545344,
"description" : "idf(docFreq=1181, maxDocs=503580)",
"details" : [ ]
}, {
"value" : 5.9360976,
"description" : "idf(docFreq=3616, maxDocs=503580)",
"details" : [ ]

"value" : 1.0,
"description" : "fieldNorm(doc=161368)",
"details" : [ ]

"value" : 0.0,
"description" : "match on required clause, product of:",
"details" : [ {
"value" : 0.0,
"description" : "# clause",
"details" : [ ]
}, {
"value" : 0.055814438,
"description" : "_type:weixinMedia, product of:",
"details" : [ {
"value" : 1.0,
"description" : "boost",
"details" : [ ]
}, {
"value" : 0.055814438,
"description" : "queryNorm",
"details" : [ ]

"_shard" : 3,
"_node" : "uSTOGHNUSjGNFweC7RynRQ",
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MzA5NTgyOTUyNA==",
"_score" : 17.796759,
"_source" : {
"mediaName" : "微美食",
"pmid" : "MzA5NTgyOTUyNA==",
"mediaNameLen" : 3,
"mediaScore" : 2.2,
"pi" : 600
},
"sort" : [ 23.7967586517334 ],
"_explanation" : {
"value" : 17.796759,
"description" : "sum of:",
"details" : [ {
"value" : 17.796759,
"description" : "weight(mediaName:"微 美食 食" in 120379) [PerFieldSimilarity], result of:",
"details" : [ {
"value" : 17.796759,
"description" : "score(doc=120379,freq=1.0), product of:",
"details" : [ {
"value" : 0.99999994,
"description" : "queryWeight, product of:",
"details" : [ {
"value" : 17.79676,
"description" : "idf(), sum of:",
"details" : [ {
"value" : 4.93295,
"description" : "idf(docFreq=10996, maxDocs=561478)",
"details" : [ ]

"value" : 6.9785085,
"description" : "idf(docFreq=1421, maxDocs=561478)",
"details" : [ ]
}, {
"value" : 5.885302,
"description" : "idf(docFreq=4242, maxDocs=561478)",
"details" : [ ]

"value" : 0.05619,
"description" : "queryNorm",
"details" : [ ]

"value" : 17.79676,
"description" : "fieldWeight in 120379, product of:",
"details" : [ {
"value" : 1.0,
"description" : "tf(freq=1.0), with freq of:",
"details" : [ {
"value" : 1.0,
"description" : "phraseFreq=1.0",
"details" : [ ]

"value" : 17.79676,
"description" : "idf(), sum of:",
"details" : [ {
"value" : 4.93295,
"description" : "idf(docFreq=10996, maxDocs=561478)",
"details" : [ ]
}, {
"value" : 6.9785085,
"description" : "idf(docFreq=1421, maxDocs=561478)",
"details" : [ ]
}, {
"value" : 5.885302,
"description" : "idf(docFreq=4242, maxDocs=561478)",
"details" : [ ]
} ]
}, {
"value" : 1.0,
"description" : "fieldNorm(doc=120379)",
"details" : [ ]

"value" : 0.0,
"description" : "match on required clause, product of:",
"details" : [ {
"value" : 0.0,
"description" : "# clause",
"details" : [ ]
}, {
"value" : 0.05619,
"description" : "_type:weixinMedia, product of:",
"details" : [ {
"value" : 1.0,
"description" : "boost",
"details" : [ ]
}, {
"value" : 0.05619,
"description" : "queryNorm",
"details" : [ ]

"_shard" : 4,
"_node" : "uSTOGHNUSjGNFweC7RynRQ",
"_index" : "media_b",
"_type" : "weixinMedia",
"_id" : "MjM5NzMxOTM4MQ==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食",
"pmid" : "MjM5NzMxOTM4MQ==",
"mediaNameLen" : 3,
"mediaScore" : 2.062,
"pi" : 506
},
"sort" : [ 22.97651153564453 ],
"_explanation" : {
"value" : 17.916512,
"description" : "sum of:",
"details" : [ {
"value" : 17.916512,
"description" : "weight(mediaName:"微 美食 食" in 364138) [PerFieldSimilarity], result of:",
"details" : [ {
"value" : 17.916512,
"description" : "fieldWeight in 364138, product of:",
"details" : [ {
"value" : 1.0,
"description" : "tf(freq=1.0), with freq of:",
"details" : [ {
"value" : 1.0,
"description" : "phraseFreq=1.0",
"details" : [ ]
} ]
}, {
"value" : 17.916512,
"description" : "idf(), sum of:",
"details" : [ {
"value" : 4.92588,
"description" : "idf(docFreq=9932, maxDocs=503580)",
"details" : [ ]
}, {
"value" : 7.0545344,
"description" : "idf(docFreq=1181, maxDocs=503580)",
"details" : [ ]
}, {
"value" : 5.9360976,
"description" : "idf(docFreq=3616, maxDocs=503580)",
"details" : [ ]
} ]
}, {
"value" : 1.0,
"description" : "fieldNorm(doc=364138)",
"details" : [ ]

+++++++++++++++++++++++++++++++++++++++++
不知道为什么搜索出来两个“微美食”的评分不同,评分最高的不是完全匹配的,这个是为什么呢?
thanks!


(Medcl) #2

你的评分公式不是自己指定了会使用pi这个值么,看返回,这些pi值是不同的


(watson) #3

我说的不同是es得出的_score值不同,不是我计算的结果。
es得出的_score的值为什么不同?
"index" : "mediab",
"_type" : "weixinMedia",
"_id" : "MzA5NTgyOTUyNA==",
"_score" : 17.916512,
"_source" : {
"mediaName" : "微美食",
"pmid" : "MzA5NTgyOTUyNA==",
"mediaNameLen" : 3,
"mediaScore" : 2.2,
"pi" : 600
},
"sort" : [ 23.91651153564453 ]
}


(system) #4

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.


(Medcl) #5

1.首先要知道es底层按分片来存储索引,每个索引各种的评分和该shard索引内的倒排表有关,所有每个分片的数据都是不一样的,打出来的分不一定是一样的,尽管他们的term一样。
2.全文检索针对的是field分词之后的结果,所以他们都匹配上就能查询出来,而评分默认是按各自shard内的评分算出来的。