Hello everyone,
I'm starting using ES and I'm really having trouble to index a dataframe.
dataframe looks like :
So far I wrote the following code :
es_client = Elasticsearch(hosts = "http://xxx.xxx.xxx.xxx:9200", basic_auth=("user", "pass") ,request_timeout=60)
def create_bulked_data(df, index_name):
bulk_data = []
for r, row in df.iterrows():
try:
data_dict = {
'_index': index_name,
'_id': row['_id'],
'_source': row
}
bulk_data.append(data_dict)
except Exception as e:
print(e)
return bulk_data
def push_data(client,bulk,index_name):
helpers.bulk(client=client, actions=bulk)
client.indices.refresh()
client.count(index=index_name)
So when I try this:
data_to_push = create_bulked_data(df,"test_indexing")
push_data(es_client,data_to_push,"test_indexing")
I have the following error message:
>>> data_to_push = create_bulked_data(df,"test_indexing")
>>> push_data(es_client,data_to_push,"test_indexing")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 2, in push_data
File "C:\[user]\AppData\Local\Programs\Python\Python39\lib\site-packages\elasticsearch\helpers\actions.py", line 524, in bulk
for ok, item in streaming_bulk(
File "C:\[user]\AppData\Local\Programs\Python\Python39\lib\site-packages\elasticsearch\helpers\actions.py", line 438, in streaming_bulk
for data, (ok, info) in zip(
File "C:\[user]\AppData\Local\Programs\Python\Python39\lib\site-packages\elasticsearch\helpers\actions.py", line 355, in _process_bulk_chunk
yield from gen
File "C:[user]\AppData\Local\Programs\Python\Python39\lib\site-packages\elasticsearch\helpers\actions.py", line 274, in _process_bulk_chunk_success
raise BulkIndexError(f"{len(errors)} document(s) failed to index.", errors)
elasticsearch.helpers.BulkIndexError: 10 document(s) failed to index.
data_to_push looks like:
Anyone can help please ?