@jimczi Posted it here too!
I am getting weird behaviour with "minimum_should_match" used inside "multi_match".
ES version: 2.4.6
Plugins installed: Kibana, sense
Java version:
java version "1.8.0_102"
Java(TM) SE Runtime Environment (build 1.8.0_102-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.102-b14, mixed mode)
OS:
Darwin Kernel Version 17.7.0
Here's the request I am using to create the index:
PUT /docs
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1,
"index": {
"analysis": {
"filter": {
"trigrams_filter": {
"type": "ngram",
"min_gram": 3,
"max_gram": 3
}
},
"analyzer": {
"analyzer_startswith": {
"tokenizer": "keyword",
"filter": "lowercase"
},
"snowball" : {
"type" : "snowball"
},
"trigrams": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"trigrams_filter"
]
}
}
}
}
},
"mappings": {
"modelresult": {
"properties": {
"allergies" : {
"type" : "string",
"analyzer" : "snowball"
},
"diets" : {
"type" : "string",
"analyzer" : "snowball"
},
"mealtypes" : {
"type" : "string",
"analyzer" : "snowball"
},
"medications" : {
"type" : "string",
"analyzer" : "snowball"
},
"django_ct" : {
"type" : "string",
"index" : "not_analyzed",
"include_in_all" : "false"
},
"django_id" : {
"type" : "string",
"index" : "not_analyzed",
"include_in_all" : "false"
},
"id" : {
"type" : "string"
},
"is_published" : {
"type" : "boolean"
},
"outdated" : {
"type" : "boolean"
},
"source_db" : {
"type" : "string"
},
"FdGrp_Cd" : {
"type" : "string"
},
"original_country" : {
"type" : "string"
},
"additional_information" : {
"type" : "string"
},
"text" : {
"type" : "string",
"fields": {
"en": {
"type": "string",
"analyzer": "english"
},
"fi": {
"type": "string",
"analyzer": "finnish"
},
"de": {
"type": "string",
"analyzer": "german"
},
"sv": {
"type": "string",
"analyzer": "swedish"
},
"general": {
"type": "string",
"analyzer": "trigrams"
}
}
},
"user_id" : {
"type" : "string",
"analyzer" : "snowball"
},
"homecare_area" : {
"type" : "string",
"analyzer" : "snowball"
},
"lang" : {
"type" : "string",
"analyzer" : "snowball"
}
}
}
}
}
Here's the query I am running:
GET /docs/_search
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"outdated": "false"
}
},
{
"term": {
"lang": "fi"
}
}
],
"must_not": [
{
"term": {
"django_ct": "web.plantranslation"
}
},
{
"term": {
"django_ct": "web.recipetranslation"
}
},
{
"term": {
"django_ct": "web.fooddestranslation"
}
},
{
"term": {
"django_ct": "web.dishtranslation"
}
},
{
"term": {
"django_ct": "web.customerinformation"
}
}
],
"should": []
}
},
"query": {
"bool": {
"should": [
{
"span_first": {
"boost": 10,
"end": 1,
"match": {
"span_term": {
"text": "makkara"
}
}
}
}
],
"must": [
{
"multi_match": {
"query": "makkara",
"type": "most_fields",
"fields": ["text.general", "text.fi"],
"minimum_should_match": "50%"
}
}
]
}
}
}
},
"size": 1000
}
The minimum_should_match is being ignored in ES 2.4.6. It doesn't matter what value it is (50%, 100%), it always returns way too many results.
This worked for ES 1.7, 2.0.1 and 2.0.2, I've tested the same query. But it's being ignored for ES 2.4.6. What is the reason? Was there some change introduced that makes "minimum_should_match" obsolete?
Here are some examples of the text returned results:
legit results: MAKSAMAKKARA, BALKANMAKKARA
wrong results: PERSIKKARAHKA, MUSTIKKAPIIRAKKA 2 KPL, LIHAPIIRAKKA 4 KPL