Error when doing knn search with more than one embeddings in the index

<
I'm trying to add an extra embedding using another field with the following code:

class Search:
def __init__(self, index_name):
    self.es = Elasticsearch(cloud_id=os.environ['ELASTIC_CLOUD_ID'],
                            api_key=os.environ['ELASTIC_API_KEY'])
    print('Connected to Elasticsearch!')
    self.model = SentenceTransformer('all-MiniLM-L6-v2')
    self.index_name = index_name

def create_index(self):
    
    self.es.indices.delete(index=self.index_name, ignore_unavailable=True)
    self.es.indices.create(index=self.index_name, mappings={
        'properties': {
            'embedding': {
                'type': 'dense_vector'
            },
            'embedding_tags': {
                'type': 'dense_vector'
            }
        }
    })

def get_embedding(self, text):
    return self.model.encode(text)

def insert_documents(self, documents):
    operations = []
    for document in documents:
        tags = document['tags'].replace(',','')
        operations.append({'index': {'_index': self.index_name}})
        operations.append({
            **document,
            'embedding': self.get_embedding(document['title']),
            'embedding_tags': self.get_embedding(tags)             
        })
    return self.es.bulk(operations=operations)

def reindex(self):
    self.create_index()
    with open('data/items.json', 'rt') as f:
        documents = json.loads(f.read())
    return self.insert_documents(documents)

def search(self, query):
    query_vector = self.get_embedding(query)

    results = self.es.search(
        knn={
            'field': 'embedding',
            'query_vector': query_vector,
            'num_candidates': 50,
            'k': 10,
        },
        _source=['title','tags','price']
    )

    res = results['hits']

    results2 = self.es.search(
        knn={
            'field': 'embedding_tags',
            'query_vector': query_vector,
            'num_candidates': 50,
            'k': 10,
        },
        _source=['title','tags','price']
    )

    res2 = results2['hits']
    return res

res and res2 both return no results and when I dig into the results object I get something like this:

ObjectApiResponse({'took': 4, 'timed_out': False, '_shards': {'total': 20, 'successful': 1, 'skipped': 0, 'failed': 19, 'failures': [{'shard': 0, 'index': '.ds-logs-enterprise_search.api-default-2024.01.26-000001', 'node': 'Rgiwp9MOTICr1IEhk1-DSQ', 'reason': {'type': 'query_shard_exception', 'reason': 'failed to create query: field [embedding] does not exist in the mapping', 'ind...

I've been trying to find some possible solutions but with no success at the moment.
/>

HI Carlos welcome to the Elastic community. Are you trying to add one more embedding field to the existing data stream .ds-logs-enterprise_search.api-default-2024.01.26-000001 ?

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.