Exact-Knn with python api

I've been using budy func + langchain retriever to conduct search in ES, my code is like below:

``

def embed_query(self,query_text,k =30,candidate_num = 45):
    if query_text is not None and len(query_text) == 0:
        print("query is empty")
        return None
    embedding_model,_ = self.embedding_model_local()
    query_vector = embedding_model.encode(query_text)
    return{
        "knn": {
            "field": "output_dense_vector",
            "query_vector": query_vector,
            "k": k,
            "num_candidates": candidate_num
        },
    }
    
async def async_es_A_KNN(self,query):
    es_retriever = ElasticsearchRetriever(
    index_name=self.index_name,
    body_func=self.embed_query,
    content_field="text",
    es_client = self.es_client,
)
    res =await es_retriever.ainvoke(query)
    retrived_docs = res
    # resp ={i.page_content: i.metadata["_score"] for i in retrived_docs}
    return {i.page_content: i.metadata["_score"] for i in retrived_docs}

and my mapping func is like this:

def create_index(self,text_field = "text",title_field = "title",dense_vector_field = "output_dense_vector",num_characters_field = "num_characters"):
    # es_client = connect_elasticsearch()
    self.es_client.indices.create(
        index=self.index_name,
        mappings={
            "properties": {
                #name the fields as given 
                text_field: {"type": "text"},
                title_field: {"type": "text"},
                dense_vector_field: {"type": "dense_vector"},
                num_characters_field: {"type": "integer"},
            }
        },
    )

``

I've been looked up the knn-search page but there's no python version template, hence can somebody tell me how can I perform exact knn with python api?

Thanks, @cazzz99. We have an example of exact KNN with Python in our docs. Let me know if this helps or if you are looking for something different.

Thanks ! I've checked that out, yet I found the example is to search in a numeral field(price range as to the example). I wonder is it possible to search on a text field? Just like my code? If so, how should I edit the body func accordingly?

Referenced from the docs, I've tried out this:

def exact_knn(self,query_text):
    if query_text is not None and len(query_text) == 0:
        print("query is empty")
        return None
    embed_model,_ =  self.embedding_model_local()
    query_vector = embed_model.encode(query_text)
    return {
        "query":{
            "script_score": {
        "query": {
          'match': {
              "text":query_text
          }
        },
        "script": {
            "source": "cosineSimilarity(params.queryVector, 'product-vector') + 1.0",
            "params": {
                "queryVector": query_vector
            }
        }
    }
        }
    }

when testing I received this:

   elasticsearch.BadRequestError: BadRequestError(400, 'search_phase_execution_exception', "class_cast_exception: class org.elasticsearch.script.field.EmptyField cannot be cast to class org.elasticsearch.script.field.vectors.DenseVectorDocValuesField (org.elasticsearch.script.field.EmptyField and org.elasticsearch.script.field.vectors.DenseVectorDocValuesField are in module org.elasticsearch.server@8.13.4 of loader 'app')") 

My mapping is quite simple :

self.es_client.indices.create(
            index=self.index_name,
            mappings={
                "properties": {
                    #name the fields as given 
                    text_field: {"type": "text"},
                    title_field: {"type": "text"},
                    dense_vector_field: {"type": "dense_vector"},
                    num_characters_field: {"type": "integer"},
                }
            },

Can somebody help me with this ?

Thanks, @cazzz99. Do you have a complete code sample you can send over?

yea, so firstly my mapping for the index is:

    def create_index(self,text_field = "text",title_field = "title",dense_vector_field = "output_dense_vector",num_characters_field = "num_characters"):
        # es_client = connect_elasticsearch()
        self.es_client.indices.create(
            index=self.index_name,
            mappings={
                "properties":{
                    text_field: {"type": "text"},
                    title_field: {"type": "text"},
                    dense_vector_field: {"type": "dense_vector"},
                    num_characters_field: {"type": "integer"}
                }
            }
        )


and the method that I call exact search is like:

def exact_knn(self,query_text):
        if query_text is not None and len(query_text) == 0:
            print("query is empty")
            return None
        embed_model,_ =  self.embedding_model_local()
        query_vector = embed_model.encode(query_text)
        return {
            "query":{
                "script_score": {
            "query": {
              'match': {
                  "text":query_text
              }
            },
            "script": {
                "source": "cosineSimilarity(params.queryVector, 'product-vector') + 1.0",
                "params": {
                    "queryVector": query_vector
                }
            }
        }
            }
        }

   
    def es_Exact_KNN(self,query):
        es_retriever = ElasticsearchRetriever(
        index_name=self.index_name,
        body_func=self.exact_knn,
        content_field="text",
        es_client = self.es_client,
    )
        res = es_retriever.invoke(query)
        return {i.page_content: i.metadata["_score"] for i in res}

1 Like

Thanks for all the follow up here. What version of Elastic are you using? Depending on your version you will want to make ensure that query_vector contains floats and not doubles.

This post about a similar issue may also be helpful here, as well as this one.

1 Like

Hello! This might work, as it doesn't rely on the query_text field in the query:

resp = client.search(
    index="<YOUR_INDEX>",
    query={
        "script_score": {
            "query": {
                "match_all": {}
            },
            "script": {
                "source": "cosineSimilarity(params.queryVector, '<DENSE_VECTOR_FIELD>') + 1.0",
                "params": {
                    "queryVector": <YOUR_QUERY_VECTOR>
                }
            }
        }
    },
)
print(resp)
1 Like