I need some help getting my index to sort my results correctly. I am
using mongoid & the tire gem, but it is pretty straightforward to
understand its configuration.
tire.settings :analysis => {
:analyzer => {
:skill_analyzer => {
'tokenizer' => 'whitespace', 'filter' => ['lowercase'],
'type' => "custom"
},
:location_analyzer => {
'tokenizer' => 'whitespace', 'filter' => ['lowercase'],
'type' => "custom"
}
}
}
tire.mapping :_boost => {:name => '_boost', :null_value => 1.0} do
indexes :id, :index => :not_analyzed
indexes :skills, :analyzer =>
'skill_analyzer', :boost => 10.0, :omit_norms => true
indexes :location, :analyzer =>
'location_analyzer', :boost => 4.0, :omit_norms => true
indexes :country_code, :index => :not_analyzed
indexes :hireable, :index => :not_analyzed, :type =>
'integer'
end
Sample record:
{"_boost":3.8305967274524235,"skills":[{"_value":"Bourne
Shell","_boost":8.318875430559885},{"_value":"Scala","_boost":
16.01893049877409},{"_value":"shell linux unix debian freebsd openbsd
netbsd bsd gnu suse opensuse ubuntu red hat fedora gentoo
slackware","_boost":8.318875430559885}],"location":
[],"country_code":null,"hireable":0}
The offending field is 'skills' and when I perform a search on a skill
(say 'Scala') the scores for the results don't reflect the boost I
have given scala. You can see from the explain below that the first
result has a lower document boost and a lower scala boost, but still
appears at the top. Can anyone help me configure this index correctly?
curl -XGET "http://localhost:9200/profiles/profile/_search?
pretty=true" -d '{"explain":true,"query":{"bool":{"must":
[{"query_string":
{"query":"scala","default_operator":"OR","minimum_should_match":"60%"}}]}},"size":
20,"from":0}'
{
"took" : 179,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 744,
"max_score" : 8.4369335,
"hits" : [ {
"_shard" : 3,
"_node" : "mD28KAI2RV-XSHTNPrP2PQ",
"_index" : "profiles",
"_type" : "profile",
"_id" : "4f34412032dbdae92700b0d7",
"_score" : 8.4369335, "_source" : {"_boost":
3.8305967274524235,"skills":[{"_value":"Bourne Shell","_boost":
8.318875430559885},{"_value":"Scala","_boost":16.01893049877409},
{"_value":"shell linux unix debian freebsd openbsd netbsd bsd gnu suse
opensuse ubuntu red hat fedora gentoo slackware","_boost":
8.318875430559885}],"location":[],"country_code":null,"hireable":0},
"_explanation" : {
"value" : 8.436933,
"description" : "fieldWeight(_all:scala in 0), product of:",
"details" : [ {
"value" : 11.327094,
"description" : "btq, product of:",
"details" : [ {
"value" : 0.70710677,
"description" : "tf(phraseFreq=0.5)"
}, {
"value" : 16.01893,
"description" : "allPayload(...)"
} ]
}, {
"value" : 0.9931271,
"description" : "idf(_all: scala=145)"
}, {
"value" : 0.75,
"description" : "fieldNorm(field=_all, doc=0)"
} ]
}
}, {
"_shard" : 3,
"_node" : "mD28KAI2RV-XSHTNPrP2PQ",
"_index" : "profiles",
"_type" : "profile",
"_id" : "4f34a3ef248f4b066e002e46",
"_score" : 7.5881076, "_source" : {"_boost":
3.6149999999999998,"skills":[{"_value":"Bourne Again Shell","_boost":
1.0396052545923595},{"_value":"Bourne Shell","_boost":
10.453783509740283},{"_value":"Scala","_boost":17.288752227294154},
{"_value":"CSS","_boost":5.840023623437591},{"_value":"XML","_boost":
7.667998680185211},{"_value":"HTML","_boost":2.6905258048255556},
{"_value":"web","_boost":4.265274714131573},{"_value":"frontend front-
end","_boost":4.265274714131573},{"_value":"shell linux unix debian
freebsd openbsd netbsd bsd gnu suse opensuse ubuntu red hat fedora
gentoo slackware","_boost":5.7466943821663214}],"location":
[],"country_code":null,"hireable":0},
"_explanation" : {
"value" : 7.588108,
"description" : "fieldWeight(_all:scala in 4), product of:",
"details" : [ {
"value" : 12.224994,
"description" : "btq, product of:",
"details" : [ {
"value" : 0.70710677,
"description" : "tf(phraseFreq=0.5)"
}, {
"value" : 17.288752,
"description" : "allPayload(...)"
} ]
}, {
"value" : 0.9931271,
"description" : "idf(_all: scala=145)"
}, {
"value" : 0.625,
"description" : "fieldNorm(field=_all, doc=4)"
} ]
}
}
Can anyone help me configure this index correctly?