Hi, I use Enterprise elasticsearch and ingest documents using web crawler. The data is pre-processed using an Inference pipeline which creates vectors for title, meta_description fields before ingesting the docs to the indexes. I also assign a field named "priority" for all the documents based on the page URL (crawled by the web crawler).
Things were fine for English markets when using ELSER embeddings + script_score to customize the documents score, but since now we have started working with non-English locales and creating embeddings using E5 model, I'm struggling to find an example for running approximate KNN search as well as customizing the score values using a script_score. Also note that I cannot use RRF as well, since I need highlighted feilds.
Below is the score calculation done for English markets with ELSER embeddings:
"query": {
"script_score": {
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": {{query_string_temp}},
"fields": [
"body_content^3.0",
"headings^4.0",
"meta_description^4.0",
"meta_keywords.text^4.0",
"title^5.0"
]
}
},
{
"text_expansion": {
"ml.inference.title_expanded.predicted_value": {
"model_text": {{query_string_temp}},
"model_id": ".elser_model_2_linux-x86_64",
"boost": 5.0
}
}
},
{
"text_expansion": {
"ml.inference.meta_description_expanded.predicted_value": {
"model_text": {{query_string_temp}},
"model_id": ".elser_model_2_linux-x86_64",
"boost": 4.0
}
}
},
{
"text_expansion": {
"ml.inference.meta_keywords_expanded.predicted_value": {
"model_text": {{query_string_temp}},
"model_id": ".elser_model_2_linux-x86_64",
"boost": 4.0
}
}
},
{
"text_expansion": {
"ml.inference.headings_expanded.predicted_value": {
"model_text": {{query_string_temp}},
"model_id": ".elser_model_2_linux-x86_64",
"boost": 4.0
}
}
},
{
"bool": {
"boost": 1.0
}
},
{
"bool": {
"boost": 1.0
}
}
],
"minimum_should_match": "1",
"boost": 1.0
}
},
"script": {
"source": "if(doc['priority'].size() > 0){ return _score*(1-(doc['priority'].value-1)/10) }",
"lang": "painless"
}
}
},
"min_score": 50.0