Hi,
I'm having a big problem with the csv filter. I got four .csv files, each with a separate .conf.
When I run those configs in stdout they are fine. When I run logstash and have only one of them active, data gets imported fine as well.
However as soon as I got two or more .conf files using the csv filter it messes up the data.
E.g. Index1 looks OK but index2 has some fields from index1 and some column names have become field values etc. It is very strange.
I also noticed that both of those two indexes also have the reverse dns lookup field from by netflow.conf.
Looks like Logstash is doing something strange somewhere or something is wrong with the way I got logstash set up. How can I fix this?
Index1 csv headers
Date Duration KBytes Service Label APN Code Lat Lon CN0 Message TxKBytes RxKBytes
Index2 csv headers
Date Block Message Code Lat Lon CN0
index1 json
{
"_index": "data-2017.09",
"_type": "data",
"_id": "AV5vIhbX0t3riYLIY8_W",
"_version": 1,
"_score": null,
"_source": {
"TxKBytes": "73876",
"Message": "Power supply was turned off",
"RxKBytes": "487211",
"CNO": "67.8",
"Label": "Default",
"Service": "Standard",
"Data": "2017-09-01 13:05:05",
"Duration": "20:03:46",
"Lon": "lonvalue",
"type": "data",
"Code": "errorcode",
"path": "/home/test/Desktop/test/test2/data/data.csv",
"netflow": {
"ipv4_src_host": "%{[netflow][ipv4_src_addr]}",
"ipv4_dst_host": "%{[netflow][ipv4_dst_addr]}"
},
"@timestamp": "2017-09-11T04:10:58.551Z",
"KBytes": "1",
"@version": "1",
"host": "ELK-test",
"Lat": "latvalue",
"APN": "myapn"
},
"fields": {
"@timestamp": [
1505103058551
]
},
"sort": [
1505103058551
]
}
index2 json
{
"_index": "event-2017.09",
"_type": "event",
"_id": "AV5vIhYL0t3riYLIY89s",
"_version": 1,
"_score": null,
"_source": {
"Label": "latvalue",
"Service": "errorcode",
"Data": "2017-09-10 11:38:23",
"Duration": "ADE",
"type": "event",
"Code": CNOvalue",
"path": "/home/test/Desktop/test/test2/event/event.csv",
"netflow": {
"ipv4_src_host": "%{[netflow][ipv4_src_addr]}",
"ipv4_dst_host": "%{[netflow][ipv4_dst_addr]}"
},
"@timestamp": "2017-09-11T04:10:58.354Z",
"KBytes": "Notice: Status (Signal).",
"@version": "1",
"host": "ELK-test",
"APN": "lonvalue"
},
"fields": {
"@timestamp": [
1505103058354
]
},
"sort": [
1505103058354
]
}
As you can see the second index has values that A) should not be there and B) are in the incorrect field. Both have the netflow fields as well which should no be there either.
Index1 conf
input {
file {
type => "data"
path => "/home/test/Desktop/test/test2/data/data.csv"
sincedb_path => "/dev/null"
start_position => "beginning"
}
}
filter {
csv {
columns => ["Data","Duration","KBytes","Service","Label","APN","Code","Lat","Lon","CNO","Message","TxKBytes","RxKBytes"]
}
date {
match => [ "Date", "yyyy-MM-dd HH:mm:ss" ]
timezone => "UTC"
target => "@timestamp"
}
mutate {
remove_field => ["message", "Date"]
}
}
output {
if [type] == "data" {
elasticsearch {
hosts => localhost
index => "data-%{+YYYY.MM}"
}
}
}
Index2 conf
input {
file {
type => "event"
path => "/home/test/Desktop/test/test2/event/event.csv"
sincedb_path => "/dev/null"
start_position => "beginning"
}
}
filter {
csv {
columns => ["Date","Block","Message","Code","Lat","Lon","CNO"]
}
date {
match => [ "Date", "yyyy-MM-dd HH:mm:ss" ]
timezone => "UTC"
target => "@timestamp"
}
mutate {
remove_field => ["message", "Date"]
}
}
output {
if [type] == "event" {
elasticsearch {
hosts => localhost
index => "event-%{+YYYY.MM}"
}
}
}