Hello @RabBit_BR!
Where are using proprietary token filter, so I can't share the analyzer definition, but you have the output from _analyze
in my first post. Here's the mapping and the results of the query with explain=true
. One is with the original analyzer, another one is with a simple
analyzer` provided in a query.
Mapping:
{
"crescendo-bo-recording-search" : {
"aliases" : { },
"mappings" : {
"properties" : {
"title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"index_options" : "docs",
"analyzer" : "custom_name_analyzer"
}
}
},
"settings" : {
"index" : {
"routing" : {
"allocation" : {
"include" : {
"_tier_preference" : "data_content"
}
}
},
"number_of_shards" : "1",
"provided_name" : "search",
"similarity" : {
"default" : {
"discount_overlaps" : "true",
"type" : "BM25"
}
},
"creation_date" : "1667571727010",
"analysis" : {
"analyzer" : {
"custom_name_analyzer" : {
"filter" : [
"lowercase",
"trim",
"customtokenfilter"
],
"type" : "custom",
"tokenizer" : "whitespace"
}
}
},
"number_of_replicas" : "5",
"uuid" : "QSfW_4IxSMW6w8cjD4F8IQ",
"version" : {
"created" : "7170399"
}
}
}
}
}
Query with custom analyzer:
{
"took" : 17,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 9587,
"relation" : "eq"
},
"max_score" : 6.748128,
"hits" : [
{
"_shard" : "[search][0]",
"_node" : "A5ZKfzdURX6IKvlBz1Rzzg",
"_index" : "search",
"_type" : "_doc",
"_id" : "CLkKQ4QB1NTMdfY8wCbT",
"_score" : 6.748128,
"_source" : {
"title" : "D"
},
"_explanation" : {
"value" : 6.748128,
"description" : "sum of:",
"details" : [
{
"value" : 6.748128,
"description" : "weight(Synonym(title:autrfois title:d title:dautrfois) in 527039) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 6.748128,
"description" : "score(freq=1.0), computed as boost * idf * tf from:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 4.741262,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 9614,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 1101616,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.646944,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "termFreq=1.0",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 3.6625738,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
},
{
"value" : 0.0,
"description" : "match on required clause, product of:",
"details" : [
{
"value" : 0.0,
"description" : "# clause",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "DocValuesFieldExistsQuery [field=_primary_term]",
"details" : [ ]
}
]
}
]
}
},
{
"_shard" : "[search][0]",
"_node" : "A5ZKfzdURX6IKvlBz1Rzzg",
"_index" : "search",
"_type" : "_doc",
"_id" : "LMUdQ4QB1NTMdfY8G9Rs",
"_score" : 6.748128,
"_source" : {
"title" : "D"
},
"_explanation" : {
"value" : 6.748128,
"description" : "sum of:",
"details" : [
{
"value" : 6.748128,
"description" : "weight(Synonym(title:autrfois title:d title:dautrfois) in 20774) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 6.748128,
"description" : "score(freq=1.0), computed as boost * idf * tf from:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 4.741262,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 9614,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 1101616,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.646944,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "termFreq=1.0",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 3.6625738,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
},
{
"value" : 0.0,
"description" : "match on required clause, product of:",
"details" : [
{
"value" : 0.0,
"description" : "# clause",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "DocValuesFieldExistsQuery [field=_primary_term]",
"details" : [ ]
}
]
}
]
}
}
]
}
}
Query with simple analyzer:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 16,
"relation" : "eq"
},
"max_score" : 16.526728,
"hits" : [
{
"_shard" : "[search][0]",
"_node" : "A5ZKfzdURX6IKvlBz1Rzzg",
"_index" : "search",
"_type" : "_doc",
"_id" : "87YHQ4QB1NTMdfY8Lkd0",
"_score" : 16.526728,
"_source" : {
"title" : "NOEL D AUTREFOIS"
},
"_explanation" : {
"value" : 16.526728,
"description" : "sum of:",
"details" : [
{
"value" : 5.1201873,
"description" : "weight(title:d in 1992259) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 5.1201873,
"description" : "score(freq=1.0), computed as boost * idf * tf from:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 4.741262,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 9614,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 1101616,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.49087304,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 3.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 3.6625738,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
},
{
"value" : 11.40654,
"description" : "weight(title:autrefois in 1992259) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 11.40654,
"description" : "score(freq=1.0), computed as boost * idf * tf from:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 10.562386,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 28,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 1101616,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.49087304,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 3.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 3.6625738,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
}
]
}
},
{
"_shard" : "[search][0]",
"_node" : "A5ZKfzdURX6IKvlBz1Rzzg",
"_index" : "search",
"_type" : "_doc",
"_id" : "NLcJQ4QB1NTMdfY8LdgQ",
"_score" : 14.747822,
"_source" : {
"title" : "Valse d'autrefois"
},
"_explanation" : {
"value" : 14.747822,
"description" : "sum of:",
"details" : [
{
"value" : 4.56906,
"description" : "weight(title:d in 470) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 4.56906,
"description" : "score(freq=1.0), computed as boost * idf * tf from:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 4.741262,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 9614,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 1101616,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.43803644,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 4.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 3.6625738,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
},
{
"value" : 10.1787615,
"description" : "weight(title:autrefois in 470) [PerFieldSimilarity], result of:",
"details" : [
{
"value" : 10.1787615,
"description" : "score(freq=1.0), computed as boost * idf * tf from:",
"details" : [
{
"value" : 2.2,
"description" : "boost",
"details" : [ ]
},
{
"value" : 10.562386,
"description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details" : [
{
"value" : 28,
"description" : "n, number of documents containing term",
"details" : [ ]
},
{
"value" : 1101616,
"description" : "N, total number of documents with field",
"details" : [ ]
}
]
},
{
"value" : 0.43803644,
"description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details" : [
{
"value" : 1.0,
"description" : "freq, occurrences of term within document",
"details" : [ ]
},
{
"value" : 1.2,
"description" : "k1, term saturation parameter",
"details" : [ ]
},
{
"value" : 0.75,
"description" : "b, length normalization parameter",
"details" : [ ]
},
{
"value" : 4.0,
"description" : "dl, length of field",
"details" : [ ]
},
{
"value" : 3.6625738,
"description" : "avgdl, average length of field",
"details" : [ ]
}
]
}
]
}
]
}
]
}
}
]
}
}
Hope this information helps you to understand the issue. In the fist query results, you can see that only d
is used to run the query, other tokens are discarded.