Hi Team,
I'm using edgenGram analyzer using settings like below
{
"content" : {
"settings" : {
"index" : {
"number_of_shards" : "5",
"provided_name" : "content",
"creation_date" : "1526295076710",
"analysis" : {
"analyzer" : {
"autocomplete_search" : {
"tokenizer" : "lowercase"
},
"edgeNgram_analyzer" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "autocomplete_tokenizer"
}
},
"tokenizer" : {
"autocomplete_tokenizer" : {
"type" : "edge_ngram",
"min_gram" : "2",
"max_gram" : "50"
}
}
},
"number_of_replicas" : "1",
"uuid" : "pJ_A1EfOTFyfXFcoPRCemQ",
"version" : {
"created" : "5060099"
}
}
}
}
}
But when I tried to see tokens it is giving only starting first character. I need tokens from spaces too. Like in example from documentation.
localhost:9200/content/_analyze
{
"analyzer" : "edgeNgram_analyzer",
"text" : "Quick Brown Fox"
}
{"tokens":[{"token":"qu","start_offset":0,"end_offset":2,"type":"word","position":0},{"token":"qui","start_offset":0,"end_offset":3,"type":"word","position":1},{"token":"quic","start_offset":0,"end_offset":4,"type":"word","position":2},{"token":"quick","start_offset":0,"end_offset":5,"type":"word","position":3},{"token":"quick ","start_offset":0,"end_offset":6,"type":"word","position":4},{"token":"quick b","start_offset":0,"end_offset":7,"type":"word","position":5},{"token":"quick br","start_offset":0,"end_offset":8,"type":"word","position":6},{"token":"quick bro","start_offset":0,"end_offset":9,"type":"word","position":7},{"token":"quick brow","start_offset":0,"end_offset":10,"type":"word","position":8},{"token":"quick brown","start_offset":0,"end_offset":11,"type":"word","position":9},{"token":"quick brown ","start_offset":0,"end_offset":12,"type":"word","position":10},{"token":"quick brown f","start_offset":0,"end_offset":13,"type":"word","position":11},{"token":"quick brown fo","start_offset":0,"end_offset":14,"type":"word","position":12},{"token":"quick brown fox","start_offset":0,"end_offset":15,"type":"word","position":13}]}
But in example documentation, tokens should look similar to below
{qu,qui,quick,br,brown}
Please help
Regards,
SatyaRaj