Hi, I'm trying to load the data stored in index with a 6.7.0 version to the new index with a 7.4.2 version adding field with 'dense_vector' type. But got the next error:
RequestError(400, 'mapper_parsing_exception', 'No handler for type [dense_vector] declared on field [vector]').
Can you explain the reason for it?
BODY = """
{
"settings": {
"number_of_shards": 2,
"number_of_replicas": 2
},
"mappings": {
"dynamic": "true",
"_source": {
"enabled": "true"
},
"properties": {
"id": {
"type": "keyword"
},
"title": {
"type": "text"
},
"description": {
"type": "text"
},
"vector": {
"type": "dense_vector",
"dims": 768
}
}
}
}
"""
def get_data(elastic_client, scroll, max_size = 10000):
"""
Extract data from elastic and return scroll_id
to have ability to extract next rows using scroll api
"""
if max_size > 10000:
max_size = 10000
if scroll is None:
result_dict = elastic_client.search(index = OLD_INDEX_NAME,
scroll = '25m',
size = max_size)
else:
result_dict = elastic_client.scroll(scroll_id = scroll,
scroll = '25m')
scroll = result_dict['_scroll_id']
return result_dict, scroll
def append_data(database_size = 20000, request_size = 10000):
"""
Append vectors of description to elastic
:param: database_size - num of rows in elastic
:maram: request_size - max num of rows, that we can get
from one request
"""
new_client.indices.create(index=NEW_INDEX_NAME,
body=BODY)
# as firstly we need to conect to elastic not via scroll
# we define scroll as None
scroll = None
if database_size%request_size == 0:
number_of_requests = database_size//request_size
else:
number_of_requests = database_size//request_size + 1
for i in range(number_of_requests):
result_dict, scroll = get_data(old_client,
scroll,
max_size = request_size)
docs = []
count = 0
for video in result_dict["hits"]["hits"]:
docs.append(video)
count += 1
#indexind descriptions by batches
if count % BATCH_SIZE == 0:
index_batch(docs)
docs = []
print("Indexed {} documents.".format(count))
if docs:
index_batch(docs)
print("Indexed {} documents.".format(count))
new_client.indices.refresh(index=NEW_INDEX_NAME)
print("Done indexing.")
def index_batch(docs):
"""Create vector field for docs"""
titles = [doc['_source']['description'] for doc in docs]
title_vectors = embed_text(titles)
requests = []
for i, doc in enumerate(docs):
#request = doc #
request = {}
request['id'] = doc['_source']['id']
request['title'] = doc['_source']['title']
request['description'] = doc['_source']['description']
request['_source']["vector"] = title_vectors[i]
requests.append(request)
bulk(new_client, iter(requests))