Importing CSV file to index my_index-[0-9] (0-9 is the last number from "owner_id", i have 10 indices, it's just based on the last digit so..)
input{
file{
path => "/data/logstash-2.3.1/conf/export.csv"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
csv {
columns => ["order_id", "owner_id", "execution_date", "creation_date", "authorization_date", "description", "payer_name", "payer_account_id", "payee_name", "payee_account_id", "amount", "currency", "status", "type", "sub_type", "svt_id", "payer_iban", "payee_iban"]
separator => ";"
skip_empty_columns => true
convert => {
"sub_type" => "integer"
}
}
grok {
match => ["owner_id", "(?<index_number>.$)"]
}
mutate {
add_field => {
"[@metadata][index_number]" => "%{index_number}"
}
}
mutate {
remove_field => ["index_number", "@timestamp", "@version"]
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "my_index-%{[@metadata][index_number]}"
document_id => "%{order_id}"
routing => "%{owner_id}"
document_type => "order_item"
flush_size => 4000
}
}
This is how my csv file looks like:
"20190616000439995800";"0911978367901";"2019-06-16";16.06.2019 12:33:57;16.06.2019 12:33:57;"Description_something 1. b/1. c-06-2019";"JOHN DOE";"3200371523";"CITY NAME";"HH04040405933282134";462;"191";"PRO";"PMT";1;"MobileVersion";"HX3331234594358234";""
Keep getting ERROR:
"status"=>400, "error"=>{"type"=>"mapper_parsing_exception", "reason"=>"failed to parse [creation_date]", "caused_by"=>{"type"=>"illegal_argument_exception", "reason"=>"Invalid format: "16.06.2019 15:57:37" is malformed at ".06.2019 15:57:37""}}}}, :level=>:warn}
In rubydebug I can see most of the lines of csv are populated correctly, but plenty of them looks like this:
"message" => ""20190616000440074438";"1103963335145";"2019-06-17";16.06.2019 20:19:13;16.06.2019 20:46:55;"Description for 5/2019\r",
"path" => "/data/logstash-2.3.1/conf/export.csv",
"host" => "myhosturl",
"tags" => [
[0] "_csvparsefailure",
[1] "_grokparsefailure"
]
0 documents are being written in Elasticsearch (it's also 2.3.1 - same as logstash). Am I missing something in configuration?
Here's the mapping for one of the indices:
{
"my_index-9": {
"mappings": {
"order_item": {
"_routing": {
"required": true
},
"properties": {
"amount": {
"type": "double"
},
"authorization_date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"creation_date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"currency": {
"type": "string",
"index": "not_analyzed"
},
"description": {
"type": "string",
"analyzer": "custom_analyzer"
},
"execution_date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"host": {
"type": "string",
"index": "not_analyzed"
},
"owner_id": {
"type": "string",
"index": "not_analyzed"
},
"path": {
"type": "string",
"index": "not_analyzed"
},
"payee_account_id": {
"type": "string",
"index": "not_analyzed"
},
"payee_name": {
"type": "string",
"analyzer": "custom_analyzer"
},
"payer_account_id": {
"type": "string",
"index": "not_analyzed"
},
"payer_name": {
"type": "string",
"analyzer": "custom_analyzer"
},
"status": {
"type": "string",
"index": "not_analyzed"
},
"sub_type": {
"type": "integer"
},
"svt_id": {
"type": "string",
"index": "not_analyzed"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}