I have created a custom pattern analyzer for one of the field. It creates 2 tokens most of the times. But when I am trying to use match query with AND operator or minimum_should_match as 100% , it returns records even if only 1 token got matched.
Mapping for the index:
{
"settings": {
"analysis": {
"analyzer": {
"test_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"test_pattern",
"unique"
]
}
},
"filter": {
"test_pattern": {
"type": "pattern_capture",
"preserve_original": 0,
"patterns": [
".*###(\\d*)###(.*###.*###.*)",
".*###(.*###.*###.*)"
]
}
}
}
},
"mappings": {
"doc_type": {
"properties": {
"test_value": {
"type": "text",
"analyzer": "test_analyzer"
}
}
}
}
}
Test docs:
{
"test_value": "abc###def###12345###jkl###mno###pqr"
}
{
"test_value": "abc###def###12367###jkl###mno###pqr"
}
Query:
{
"query": {
"match": {
"test_value": {
"query": "abc###def###12345###jkl###mno###pqr",
"operator": "AND"
}
}
}
}
The following query returns both the records.
I tried to understand the explanation of the result as well. I don't know why there is a Synonym in the explanation. Can you please help where am I wrong?
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.46029136,
"hits": [
{
"_shard": "[test_stack][1]",
"_node": "JO7WIHxLQKW9b_hc8Xm9fQ",
"_index": "test_stack",
"_type": "doc_type",
"_id": "AWkPiO2DN2C8SdyE0d6K",
"_score": 0.46029136,
"_source": {
"test_value": "abc###def###12345###jkl###mno###pqr"
},
"_explanation": {
"value": 0.46029136,
"description": "weight(Synonym(test_value:12345 test_value:jkl###mno###pqr) in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 0.46029136,
"description": "score(doc=0,freq=2.0 = termFreq=2.0 ), product of:",
"details": [
{
"value": 0.2876821,
"description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
"details": [
{
"value": 1,
"description": "docFreq",
"details": []
},
{
"value": 1,
"description": "docCount",
"details": []
}
]
},
{
"value": 1.6,
"description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
"details": [
{
"value": 2,
"description": "termFreq=2.0",
"details": []
},
{
"value": 1.2,
"description": "parameter k1",
"details": []
},
{
"value": 0.75,
"description": "parameter b",
"details": []
},
{
"value": 2,
"description": "avgFieldLength",
"details": []
},
{
"value": 1,
"description": "fieldLength",
"details": []
}
]
}
]
}
]
}
},
{
"_shard": "[test_stack][4]",
"_node": "JO7WIHxLQKW9b_hc8Xm9fQ",
"_index": "test_stack",
"_type": "doc_type",
"_id": "AWkPiQfJN2C8SdyE0d6L",
"_score": 0.36165747,
"_source": {
"test_value": "abc###def###12378###jkl###mno###pqr"
},
"_explanation": {
"value": 0.3616575,
"description": "weight(Synonym(test_value:12345 test_value:jkl###mno###pqr) in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 0.3616575,
"description": "score(doc=0,freq=1.0 = termFreq=1.0 ), product of:",
"details": [
{
"value": 0.2876821,
"description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
"details": [
{
"value": 1,
"description": "docFreq",
"details": []
},
{
"value": 1,
"description": "docCount",
"details": []
}
]
},
{
"value": 1.2571429,
"description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
"details": [
{
"value": 1,
"description": "termFreq=1.0",
"details": []
},
{
"value": 1.2,
"description": "parameter k1",
"details": []
},
{
"value": 0.75,
"description": "parameter b",
"details": []
},
{
"value": 2,
"description": "avgFieldLength",
"details": []
},
{
"value": 1,
"description": "fieldLength",
"details": []
}
]
}
]
}
]
}
}
]
}
}