Hi everyone,
I'm using nGram filter for partial matching and have some problems with
relevance scoring in my search results.
With multi_field and the standard analyzer I can boost the exact match e.g.
"foo", which is good.
But I also want the term "barfoobar" to have a higher score than "
blablablafoobarbarbar", because the field length is shorter.
I thought this is the default in calculating the relevance score, but in
fact they both have the same score (0.36101705).
How can I implement this?
Full demo here:
create index
curl -XPUT 'localhost:9200/test' -d'
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"analyzer": {
"my_index_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"my_nGram"
]
},
"my_search_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase"
]
}
},
"filter": {
"my_nGram": {
"type": "nGram",
"min_gram": 1,
"max_gram": 50
}
}
}
},
"mappings": {
"my_type": {
"_all": {
"enabled": false
},
"properties": {
"my_field": {
"type": "multi_field",
"fields": {
"my_field": {
"type": "string",
"index_analyzer": "standard",
"search_analyzer": "standard",
"include_in_all": false,
"boost": 10.0
},
"partial": {
"type": "string",
"index_analyzer": "my_index_analyzer",
"search_analyzer": "my_search_analyzer",
"include_in_all": false,
"boost": 1.0
}
}
}
}
}
}
}
'
import data
curl -XPUT 'localhost:9200/test/my_type/1' -d '
{
"my_field": "blablablafoobarbarbar"
}
'
curl -XPUT 'localhost:9200/test/my_type/2' -d '
{
"my_field": "foo"
}
'
curl -XPUT 'localhost:9200/test/my_type/3' -d '
{
"my_field": "barfoobar"
}
'
search
curl -XGET 'localhost:9200/test/my_type/_search?pretty' -d '
{
"query": {
"multi_match": {
"query": "foo",
"fields": [
"my_field",
"my_field.partial"
],
"operator": "and"
}
}
}
'
result
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 14.054651,
"hits" : [ {
"_index" : "test",
"_type" : "my_type",
"_id" : "2",
"_score" : 14.054651,
"_source":
{
"my_field": "foo"
}
}, {
"_index" : "test",
"_type" : "my_type",
"_id" : "1",
"_score" : 0.36101705,
"_source":
{
"my_field": "blablablafoobarbarbar"
}
}, {
"_index" : "test",
"_type" : "my_type",
"_id" : "3",
"_score" : 0.36101705,
"_source":
{
"my_field": "barfoobar"
}
} ]
}
}
Thanks in advance and best regards,
Torben
--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/958585db-8c0a-4c9b-86b2-3df9513b5a7f%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.