Hi,
I'm trying to test out the parallel_bulk functionality in the python client for elasticsearch and I can't seem to get helpers.parallel_bulk to work.
For example, using the regular helpers.bulk works:
bulk_data = []
header = data.columns
for i in range(len(data)):
source_dict = {}
row = data.iloc[i]
for k in header:
source_dict[k] = str(row[k])
data_dict = {
'_op_type': 'index',
'_index': index_name,
'_type': doc_type,
'_source': source_dict
}
bulk_data.append(data_dict)
es.indices.create(index=index_name, body=settings, ignore=404)
helpers.bulk(client=es, actions=bulk_data)
es.indices.refresh()
es.count(index=index_name)
{'_shards': {'failed': 0, 'successful': 5, 'total': 5}, 'count': 13979}
But replacing it with helpers.parallel_bulk doesn't seem to index anything:
bulk_data = []
header = data.columns
for i in range(len(data)):
source_dict = {}
row = data.iloc[i]
for k in header:
source_dict[k] = str(row[k])
data_dict = {
'_op_type': 'index',
'_index': index_name,
'_type': doc_type,
'_source': source_dict
}
bulk_data.append(data_dict)
es.indices.create(index=index_name, body=settings, ignore=404)
helpers.parallel_bulk(client=es, actions=bulk_data, thread_count=4)
es.indices.refresh()
es.count(index=index_name)
{'_shards': {'failed': 0, 'successful': 5, 'total': 5}, 'count': 0}
Am I missing something? I'm on elasticsearch 2.1.1 with elasticsearch-py 2.1.0.