Hi,
Struggling to get a particular query working. We have a single search bar where a user could type in various terms to search for related to people, i.e. "john smith manager edinburgh"
and the following query works okay for this
GET myindex/_search
{
"query": {
"multi_match": {
"query": "john smith manager edinburgh",
"fields": [
"first_name",
"last_name",
"post_code",
"post_town",
"place_of_birth",
"known_as",
"maiden_name",
"occupation",
"date_of_birth.text",
"address",
"contact_details.telephone.value"
],
"type": "cross_fields",
"operator": "and"
}
}
}
There is a requirement to use phonetics on the surname, so have defined an appropriate analyzer for the last_name, when applying this to the query
GET myindex/_search
{
"query": {
"multi_match": {
"query": "john smith manager edinburgh",
"fields": [
"first_name",
"last_name.metaphone",
"post_code",
"post_town",
"place_of_birth",
"known_as",
"maiden_name",
"occupation",
"date_of_birth.text",
"address",
"contact_details.telephone.value"
],
"type": "cross_fields",
"operator": "and"
}
}
}
Returns no hits, whereas the previous query returned 1 hit.
just searching on say "smith" or "smyth" or "smythe" work as I would expect all return the same hits regardless of the spelling of smith.
We also have declared a synonym analyzer for first_name so that when say a user enters "jim" it also searches for "james, jamie, jimmy, jimmie" for example. But again the same issue applies that when applying the search to first_name.syn it doesn't work
GET myindex/_search
{
"query": {
"multi_match": {
"query": "james evans",
"fields": [
"first_name.syn",
"last_name",
"post_code",
"post_town",
"place_of_birth",
"known_as",
"maiden_name",
"occupation",
"date_of_birth.text",
"address",
"contact_details.telephone.value"
],
"type": "cross_fields",
"operator": "and"
}
}
}
would return 1 hit, but searching "jim evans" doesn't return any hits and I know the synonyms and applied to the field, I would expect "jim evans" to return the exact same hit
GET myindex/_analyze
{
"analyzer": "synonym_analyzer",
"text": "james"
}
produces
{
"tokens" : [
{
"token" : "james",
"start_offset" : 0,
"end_offset" : 5,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "jamie",
"start_offset" : 0,
"end_offset" : 5,
"type" : "SYNONYM",
"position" : 0
},
{
"token" : "jem",
"start_offset" : 0,
"end_offset" : 5,
"type" : "SYNONYM",
"position" : 0
},
{
"token" : "jim",
"start_offset" : 0,
"end_offset" : 5,
"type" : "SYNONYM",
"position" : 0
},
{
"token" : "jimmie",
"start_offset" : 0,
"end_offset" : 5,
"type" : "SYNONYM",
"position" : 0
},
{
"token" : "jimmy",
"start_offset" : 0,
"end_offset" : 5,
"type" : "SYNONYM",
"position" : 0
}
]
}
filters and analyzers defined as
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"index": {
"analysis": {
"analyzer": {
"synonym_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase", "synonym"]
},
"lookahead_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "custom_edge_ngram"]
},
"nospaces_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": ["lowercase", "whitespace_remove"]
},
"nospaces_edgengram_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": ["lowercase", "whitespace_remove", "custom_edge_ngram"]
},
"metaphone_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase", "insight_metaphone"]
},
"soundex_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase", "insight_soundex"]
}
},
"filter": {
"custom_edge_ngram": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 10
},
"synonym": {
"type": "synonym",
"expand": true,
"synonyms_path": "/usr/share/elasticsearch/config/fixed.syn"
},
"whitespace_remove": {
"type": "pattern_replace",
"pattern": " ",
"replacement": ""
},
"insight_metaphone": {
"type": "phonetic",
"encoder": "metaphone",
"replace": false
},
"insight_soundex": {
"type": "phonetic",
"encoder": "soundex",
"replace": false
}
}
}
}
},
At a loss as to how to structure the query properly, any help would be appreciated
Thanks
Mike