I'm confused why the match query seen below is matching two documents rather than just one. I thought using the "and" operator would require all terms to be present in order for it to match.
When I hit the explain endpoint (GET people/_explain/2
) with the id of the document I do not expect to be there, I see the description mentioning synonyms, which seems unexpected to me.
weight(Synonym(email:john email:john.smith email:smith) in 1) [PerFieldSimilarity]
Why is tom.smith@gmail.com
showing up in the results?
DELETE people
PUT people
{
"mappings": {
"properties": {
"email": {
"type": "text",
"analyzer": "email_analyzer"
}
}
},
"settings": {
"analysis": {
"analyzer": {
"email_analyzer": {
"filter": [
"email_filter",
"lowercase",
"unique"
],
"tokenizer": "standard"
}
},
"filter": {
"email_filter": {
"type": "pattern_capture",
"preserve_original": true,
"patterns": [
"([^@]+)",
"""(\p{L}+)""",
"""(\d+)""",
"@(.+)"
]
}
}
}
}
}
POST _bulk
{ "index" : { "_index" : "people", "_id" : "1" } }
{ "email" : "john.smith@gmail.com" }
{ "index" : { "_index" : "people", "_id" : "2" } }
{ "email" : "tom.smith@gmail.com" }
{ "index" : { "_index" : "people", "_id" : "3" } }
{ "email" : "mike.wozowski@gmail.com" }
GET people/_analyze
{
"text": "tom.smith@gmail.com",
"field": "email"
}
GET people/_search
{
"query": {
"match": {
"email": {"query": "john.smith", "operator": "and"}
}
}
}