Hello community,
I was trying to upload a simple csv-File but unfortunatelty my Mapping is not working. All my fields are still "keywords". What am I missing?
def csv_reader(filename,indexname):
# connection to ElasticSearch: port and host are variable, standard is like this
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
# if es is reached, it prints connected else it reads not connected
if es.ping():
print("connected")
else:
print("Not connected")
with open(filename, 'r') as outfile:
reader = csv.DictReader(outfile)
Settings = {
"settings": {
"numer_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"members": {
"dynamic": "strict",
"properties": {
"_id": {
"type": "long"
},
"name": {
"type": "text"
},
"gefundene_fehler": {
"type": "long"
},
"behobene_fehler": {
"type": "long"
},
"updated_time": {
"type": "date"
},
"time": {
"type": "date"
},
"timestamp": {
"type": "date"
}
}
}
}
}
if es.indices.exists(indexname):
print("deleting existing index")
es.indices.delete(index=indexname, ignore=[400,404])
print("creating new index1")
es.indices.create(index=indexname, ignore=400, body=Settings)
helpers.bulk(es, reader, index=indexname)
else:
es.indices.create(index=indexname, ignore=400, body=Settings)
helpers.bulk(es, reader, index=indexname)
print("creating new index2")
print("all lines loaded")
Thank you for your response. But unfortunately its not working. I just want to update a csv-File to Elasticsearch. If I try it like this, then I get following errors in PyCharm:
line 399, in bulk for ok, item in streaming_bulk(client, actions, *args, **kwargs)
line 320, in streaming_bulk for data, (ok, info) in zip(
line 249, in _process_bulk_chunk for item in gen:
line 188, in _process_bulk_chunk_success raise BulkIndexError("%i document(s) failed to index." % len(errors), errors)
elasticsearch.helpers.errors.BulkIndexError: ('2 document(s) failed to index.', [{'index': {'_index': 'mdfadf', '_type': '_doc', '_id': 'UmY3N4ABaUVkc-44MXpt', 'status': 400, 'error': {'type': 'mapper_parsing_exception', 'reason': "failed to parse field [time] of type [date] in document with id 'UmY3N4ABaUVkc-44MXpt'. Preview of field's value: '2022-01-28 13:03:29'", 'caused_by': {'type': 'illegal_argument_exception', 'reason': 'failed to parse date field [2022-01-28 13:03:29] with format [strict_date_optional_time||epoch_millis]', 'caused_by': {'type': 'date_time_parse_exception', 'reason': 'Failed to parse with all enclosed parsers'}}}, 'data': {'name': 'ProjectR', 'gefunde_fehler': '7', 'behobene_fehler': '9', 'time': '2022-01-28 13:03:29', 'updated_time': '2022-04-17 13:10:01.337150'}}}, {'index': {'_index': 'mdfadf', '_type': '_doc', '_id': 'U2Y3N4ABaUVkc-44MXpt', 'status': 400, 'error': {'type': 'mapper_parsing_exception', 'reason': "failed to parse field [time] of type [date] in document with id 'U2Y3N4ABaUVkc-44MXpt'. Preview of field's value: '2022-02-16 17:00:26'", 'caused_by': {'type': 'illegal_argument_exception', 'reason': 'failed to parse date field [2022-02-16 17:00:26] with format [strict_date_optional_time||epoch_millis]', 'caused_by': {'type': 'date_time_parse_exception', 'reason': 'Failed to parse with all enclosed parsers'}}}, 'data': {'name': 'ProjectB', 'gefunde_fehler': '2', 'behobene_fehler': '8', 'time': '2022-02-16 17:00:26', 'updated_time': '2022-04-17 13:10:01.337150'}}}])
to '2022-04-17 13:10:01.337150' use strict_date_optional_time_nanos.
Note that you need to add the 'T' in values for "updated_time" like this: 2022-04-17T13:10:01.337150 or save without the nanoseconds.
If you choose a date without nanosecond, you can use the field format time.
Apache, Apache Lucene, Apache Hadoop, Hadoop, HDFS and the yellow elephant
logo are trademarks of the
Apache Software Foundation
in the United States and/or other countries.