An array long type field in document geting automatically sorted when calculating dotproduct via custom painless script.
Index Mapping
PUT /custom_dot_product
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"level1": {
"type": "nested",
"properties": {
"level2": {
"properties": {
"vector_3": {
"type": "long"
}
}
}
}
}
}
}
}
Document data .
POST /custom_dot_product/_doc/1
{
"level1": {
"level2": {
"vector_3": [
2,
2,
2,
0
]
}
}
}
Search query
POST custom_dot_product/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "level1",
"query": {
"function_score": {
"query": {
"match_all": {}
},
"script_score": {
"script": {
"source": " if(doc['level1.level2.vector_3'].size()>0){ double result = 0.0; for (int i = 0; i < params.user_vector.length; i++) result += params.user_vector[i] * doc['level1.level2.vector_3'][i]; return result;} ",
"params": {
"user_vector": [
1,
1,
1,
0
]
}
}
},
"boost_mode": "replace"
}
},
"inner_hits": {
"sort": []
}
}
}
]
}
}
}
Output :
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 4.0,
"hits" : [
{
"_index" : "custom_dot_product",
"_type" : "_doc",
"_id" : "1",
"_score" : 4.0,
"_source" : {
"level1" : {
"level2" : {
"vector_3" : [
2,
2,
2,
0
]
}
}
},
"inner_hits" : {
"level1" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 4.0,
"hits" : [
{
"_index" : "custom_dot_product",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "level1",
"offset" : 0
},
"_score" : 4.0,
"_source" : {
"level2" : {
"vector_3" : [
2,
2,
2,
0
]
}
}
}
]
}
}
}
}
]
}
}
How to avoid array field sorting while calculating the dotproduct with painless?
We are not using dense_vector as we have to migrate this index to opensearch, where inbulid dotproduct not avalible hence writing custom dotproduct.