I have indicized three times the same text into a nested object to apply differents analyzers. I can't understand why if I use highlight in the query with the nested field, only for one of the nested fields works.
with this one highlight works, changing sentences.sentence_value to sentences.sentence_insentitive doen't works.
{
"_source": {
"includes": [ "highlight", "title", "media", "verbatim", "incrementalPushing" ]
},
"query": {
"bool": {
"must": [
{
"match": {
"customer": "TSTSM"
}
},
{
"nested": {
"path": "sentences",
"query": {
"match_phrase": {
"sentences.sentence_value": {
"query": "commissione europea"
}
}
}
}
}
]
}
},
"highlight": {
"fields": {
"sentences.sentence_value": {}
}
}
}
Here the mapping of the nested field:
"sentences":{ "type": "nested", "properties": { "sentence_value":{ "type": "text", "store": false, "search_analyzer": "index_analysis", "analyzer": "custom_italian", "similarity": "BM25" },"sentence_terms":{ "type": "text", "store": true, "analyzer": "termvectrix", "similarity": "BM25" }, "sentence_insensitive":{ "type": "text", "store": true, "term_vector": "with_positions_offsets_payloads", "analyzer": "insensitive", "similarity": "BM25", "doc_values": true }, "sentence_start":{ "type": "date", "format" : "yyyy/MM/dd'-'HH:mm:ss", "store": false }, "sentence_end":{ "type": "date", "format" : "yyyy/MM/dd'-'HH:mm:ss", "store": false }, "speaker_value": { "type": "text", "index" : "not_analyzed" } }}
and the anlyzers:
"settings": { "analysis": { "filter": { "italian_elision": { "type": "elision", "articles": [ "c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell", "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d" ] }, "italian_stop": { "type": "stop", "stopwords": "_italian_" }, "italian_stemmer": { "type": "stemmer", "language": "light_italian" }, "custom_synonym" : { "type" : "synonym", "tokenizer": "keyword", "synonyms_path" : "analysis/sinonimi.txt" }, "edgengram":{ "type":"edgeNGram", "min_gram":2, "max_gram":20 }, "unique_stem": { "type": "unique", "only_on_same_position": true } }, "analyzer": { "termvectrix":{ "tokenizer": "standard", "filter": ["standard"]}, "custom_italian": { "tokenizer": "standard", "filter": [ "lowercase", "italian_stop", "italian_elision", "keyword_repeat", "italian_stemmer", "unique_stem", "edgengram" ] }, "index_analysis": {"tokenizer": "standard", "filter": ["lowercase", "custom_synonym"] },"insensitive":{ "tokenizer": "standard", "filter": [ "lowercase" ] }