Hello,
I'm currently developing a site using Pimcore and their Advanced Object Search addon for Elasticsearch but am having issues getting hits on the correct words. A stripped down version of the index itself looks like this:
{
"advanced_object_search_product" : {
"aliases" : { },
"mappings" : {
"properties" : {
"localizedfields" : {
"type" : "nested",
"properties" : {
"en" : {
"type" : "nested",
"properties" : {
"KeyFeatures" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
}
}
},
"MaterialType" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
}
}
},
"SeoDescription" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
}
}
},
"SeoTitle" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
}
}
},
"description" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
}
}
},
"explanation" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
}
}
},
"name" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
}
}
},
"shortdescription" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
}
}
},
"specifications" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
}
}
}
}
},
"sv_SE" : {
"type" : "nested",
"properties" : {
"KeyFeatures" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
}
}
},
"MaterialType" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
}
}
},
"SeoDescription" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
}
}
},
"SeoTitle" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
}
}
},
"description" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
}
}
},
"explanation" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
}
}
},
"name" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
}
}
},
"shortdescription" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
}
}
},
"specifications" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"analyzed_ngram" : {
"type" : "text",
"analyzer" : "app_ngram_analyzer",
"search_analyzer" : "app_whitespace_analyzer"
},
"raw" : {
"type" : "text"
}
}
}
}
}
}
}
}
},
"sku" : {
"properties" : {
"notInherited" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
},
"standard" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
}
}
}
}
}
},
"settings" : {
"index" : {
"max_ngram_diff" : "30",
"routing" : {
"allocation" : {
"include" : {
"_tier_preference" : "data_content"
}
}
},
"mapping" : {
"nested_fields" : {
"limit" : "200"
},
"total_fields" : {
"limit" : "100000"
}
},
"number_of_shards" : "5",
"provided_name" : "advanced_object_search_product",
"creation_date" : "1645693962446",
"analysis" : {
"normalizer" : {
"lowercase" : {
"filter" : [
"lowercase"
],
"type" : "custom"
}
},
"analyzer" : {
"app_ngram_analyzer" : {
"filter" : [
"lowercase"
],
"tokenizer" : "app_ngram_tokenzier"
},
"app_whitespace_analyzer" : {
"filter" : [
"lowercase"
],
"tokenizer" : "app_whitespace_tokenzier"
}
},
"tokenizer" : {
"app_ngram_tokenzier" : {
"token_chars" : [
"letter",
"digit"
],
"min_gram" : "3",
"type" : "ngram",
"max_gram" : "25"
},
"app_whitespace_tokenzier" : {
"type" : "whitespace"
}
}
},
"number_of_replicas" : "0",
"uuid" : "VldwrvqtSJSwzOi77FF5Rg",
"version" : {
"created" : "7120099"
}
}
}
}
}
My goal is to be able to get hits on all fields for a specific language under the localizedfields as well as the sku which isn't localized. I've tried a bunch of different queries using query_string, multi_match etc but have never been able to get it to work properly and am unsure of how to do so. My current query setup which is the most accurate yet looks like this but it doesn't find all the hits I want:
"query": {
"dis_max": {
"queries": [
{
"query_string": {
"query":"banderoll",
"fields": [
"sku^3",
"sku.*^3"
],
"default_operator":"and",
"lenient":true,
"analyze_wildcard":true,
"allow_leading_wildcard":true
}
},
{
"nested": {
"path":"localizedfields",
"query": {
"nested": {
"path":"localizedfields.sv_SE",
"query": {
"bool": {
"must": [
{
"query_string": {
"query":"banderoll",
"fields": [
"localizedfields.sv_SE.name^2",
"localizedfields.sv_SE.*",
"localizedfields.sv_SE.name.*^2",
"localizedfields.sv_SE.*.*",
"localizedfields.sv_SE.*.*.*"
],
"default_operator":"and",
"lenient":true,
"analyze_wildcard":true,
"allow_leading_wildcard":true
}
}
],
"should": [
{
"multi_match": {
"query":"banderoll",
"fields":[
"localizedfields.sv_SE.name^2",
"localizedfields.sv_SE.*",
"localizedfields.sv_SE.name.*^2",
"localizedfields.sv_SE.*.*",
"localizedfields.sv_SE.*.*.*"
],
"boost":10,
"lenient":true
}
}
]
}
}
}
}
}
}
],
"tie_breaker":0.7,
"boost":1.2
}
}
What am I doing wrong and how should I reconfigure this in order to get a good search engine for the site? I've noticed that the above query gives some hits with a complete word and different or additional hits when removing the last character for example.
I've searched through Google along with this forum and the Elasticsearch guides but have been unsuccessful in finding a solution that works as expected.
And help is greatly appreciated!
Thanks in advance!