<
I'm trying to add an extra embedding using another field with the following code:
class Search:
def __init__(self, index_name):
self.es = Elasticsearch(cloud_id=os.environ['ELASTIC_CLOUD_ID'],
api_key=os.environ['ELASTIC_API_KEY'])
print('Connected to Elasticsearch!')
self.model = SentenceTransformer('all-MiniLM-L6-v2')
self.index_name = index_name
def create_index(self):
self.es.indices.delete(index=self.index_name, ignore_unavailable=True)
self.es.indices.create(index=self.index_name, mappings={
'properties': {
'embedding': {
'type': 'dense_vector'
},
'embedding_tags': {
'type': 'dense_vector'
}
}
})
def get_embedding(self, text):
return self.model.encode(text)
def insert_documents(self, documents):
operations = []
for document in documents:
tags = document['tags'].replace(',','')
operations.append({'index': {'_index': self.index_name}})
operations.append({
**document,
'embedding': self.get_embedding(document['title']),
'embedding_tags': self.get_embedding(tags)
})
return self.es.bulk(operations=operations)
def reindex(self):
self.create_index()
with open('data/items.json', 'rt') as f:
documents = json.loads(f.read())
return self.insert_documents(documents)
def search(self, query):
query_vector = self.get_embedding(query)
results = self.es.search(
knn={
'field': 'embedding',
'query_vector': query_vector,
'num_candidates': 50,
'k': 10,
},
_source=['title','tags','price']
)
res = results['hits']
results2 = self.es.search(
knn={
'field': 'embedding_tags',
'query_vector': query_vector,
'num_candidates': 50,
'k': 10,
},
_source=['title','tags','price']
)
res2 = results2['hits']
return res
res and res2 both return no results and when I dig into the results object I get something like this:
ObjectApiResponse({'took': 4, 'timed_out': False, '_shards': {'total': 20, 'successful': 1, 'skipped': 0, 'failed': 19, 'failures': [{'shard': 0, 'index': '.ds-logs-enterprise_search.api-default-2024.01.26-000001', 'node': 'Rgiwp9MOTICr1IEhk1-DSQ', 'reason': {'type': 'query_shard_exception', 'reason': 'failed to create query: field [embedding] does not exist in the mapping', 'ind...
I've been trying to find some possible solutions but with no success at the moment.
/>