Hello, I have an interesting issue to report for ES 7.12. For one of the dynamically indexed field the analyzer used for indexing is "ngram" but for searching its "standard". I am not sure if this is a bug or an intended change. This started happening since ES 7.10.
Below I am showing a simple mapping with one field and different analyzers. Below steps performed on 7.9 and 7.12 have different results. 7.9 gives a hit while 7.12 doesn't, reason being
use of standard analyzer while searching.
Steps to reproduce:
- Create an Index with mapping.
PUT - http://localhost:9200/{indexname}
{
"mappings": {
"properties": {
"category": {
"properties": {
"id": {
"type": "long"
},
"name": {
"type": "keyword",
"normalizer": "lowercase_normalizer",
"fields": {
"analyzed": {
"type": "text",
"analyzer": "content_asset_engram_analyzer"
}
}
},
"parentId": {
"type": "long"
}
}
}
}
},
"settings": {
"index": {
"max_ngram_diff": "3",
"refresh_interval": "30s",
"number_of_shards": "1",
"analysis": {
"normalizer": {
"lowercase_normalizer": {
"filter": [ "lowercase" ],
"type": "custom"
}
},
"analyzer": {
"default_search": {
"filter": [ "lowercase" ],
"type": "custom",
"tokenizer": "standard"
},
"content_asset_html_strip": {
"filter": [ "lowercase" ],
"type": "custom",
"char_filter": [ "html_strip" ],
"tokenizer": "content_asset_engram_tokenizer"
},
"content_asset_engram_analyzer": {
"filter": [ "lowercase" ],
"type": "custom",
"tokenizer": "content_asset_engram_tokenizer"
}
},
"tokenizer": {
"content_asset_engram_tokenizer": {
"token_chars": [ "letter", "digit" ],
"min_gram": "3",
"type": "ngram",
"max_gram": "6"
}
}
}
}
}
}
- Add document to the index
POST http://localhost:9200/{indexname}/_doc
{
"category": {
"id": 3962,
"name": "Content Builder",
"parentId": 0
}
}
- Search
GET http://localhost:9200/{indexname}/_search
{
"query": {
"match": {
"category.name.analyzed": "content"
}
}
}