Trying to include special characters in ngram tokeniser
DELETE test
PUT test
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase", "ngram", "asciifolding", "stop"
]
}
},
"filter": {
"ngram": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"text": {
"type": "text",
"analyzer": "my_analyzer",
"search_analyzer": "simple"
}
}
}
}
}
PUT test/doc/1
{
"text": "2 #Quick Foxes lived and died"
}
PUT test/doc/2
{
"text": "2 #Quick Foxes lived died"
}
PUT test/doc/3
{
"text": "2 #Quick Foxes lived died and resurrected their wys "
}
PUT test/doc/6
{
"text": "$100 dollars manga #thenga @trump"
}
When we try the query
POST test/_refresh
GET test/_search
GET test/doc/_search
{
"query": {
"match_phrase": {
"text": "#Qui"
}
}
}
Result is
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
But, When we try this
GET test/_search
{
"query": {
"match_phrase": {
"text": "fo"
}
}
}
Result is
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1.0247581,
"hits": [
{
"_index": "test",
"_type": "doc",
"_id": "2",
"_score": 1.0247581,
"_source": {
"text": "2 #Quick Foxes lived died"
}
},
{
"_index": "test",
"_type": "doc",
"_id": "1",
"_score": 0.41531453,
"_source": {
"text": "2 #Quick Foxes lived and died"
}
},
{
"_index": "test",
"_type": "doc",
"_id": "3",
"_score": 0.41030136,
"_source": {
"text": "2 #Quick Foxes lived died and resurrected their wys "
}
}
]
}
verifying the analyzer
GET test/_analyze
{
"analyzer": "my_analyzer",
"text": "2 #Quick Foxes lived and died"
}
Result
{
"tokens": [
{
"token": "2",
"start_offset": 0,
"end_offset": 1,
"type": "word",
"position": 0
},
{
"token": "#",
"start_offset": 2,
"end_offset": 8,
"type": "word",
"position": 1
},
{
"token": "#q",
"start_offset": 2,
"end_offset": 8,
"type": "word",
"position": 1
},
{
"token": "#qu",
"start_offset": 2,
"end_offset": 8,
"type": "word",
"position": 1
},
{
"token": "#qui",
"start_offset": 2,
"end_offset": 8,
"type": "word",
"position": 1
},
{
"token": "#quic",
"start_offset": 2,
"end_offset": 8,
"type": "word",
"position": 1
},
{
"token": "#quick",
"start_offset": 2,
"end_offset": 8,
"type": "word",
"position": 1
},
{
"token": "f",
"start_offset": 9,
"end_offset": 14,
"type": "word",
"position": 2
},
{
"token": "fo",
"start_offset": 9,
"end_offset": 14,
"type": "word",
"position": 2
},
{
"token": "fox",
"start_offset": 9,
"end_offset": 14,
"type": "word",
"position": 2
},
{
"token": "foxe",
"start_offset": 9,
"end_offset": 14,
"type": "word",
"position": 2
},
{
"token": "foxes",
"start_offset": 9,
"end_offset": 14,
"type": "word",
"position": 2
},
{
"token": "l",
"start_offset": 15,
"end_offset": 20,
"type": "word",
"position": 3
},
{
"token": "li",
"start_offset": 15,
"end_offset": 20,
"type": "word",
"position": 3
},
{
"token": "liv",
"start_offset": 15,
"end_offset": 20,
"type": "word",
"position": 3
},
{
"token": "live",
"start_offset": 15,
"end_offset": 20,
"type": "word",
"position": 3
},
{
"token": "lived",
"start_offset": 15,
"end_offset": 20,
"type": "word",
"position": 3
},
{
"token": "d",
"start_offset": 25,
"end_offset": 29,
"type": "word",
"position": 5
},
{
"token": "di",
"start_offset": 25,
"end_offset": 29,
"type": "word",
"position": 5
},
{
"token": "die",
"start_offset": 25,
"end_offset": 29,
"type": "word",
"position": 5
},
{
"token": "died",
"start_offset": 25,
"end_offset": 29,
"type": "word",
"position": 5
}
]
}
How do I include special characters in the search?