Hi dudes!
I just wanna remove the header (first line) from my csv files. The first line is containing the column names. After parsing csv file, the visualization on Kibana shows the first line parsed as the column name indicates.
My code:
input {
file {
path => "/home/admxxx/xxxx/xxxx/*.csv"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
csv {
columns => ["id tweet","date","author","text","app","id user","followers","following","stauses","location","urls","geolocation","name","description","url_media","type media","quoted","relation","replied_id","user replied","retweeted_id","user retweeted","quoted_id","user quoted","first HT","lang","created_at","verified","avatar","link"]
separator => ";" #tab
skip_header => true
#autodetect_column_names => true
#autogenerate_column_names => true
}
date {
#match => ["date","yyyy-MM-dd HH:mm:ss"]
match => ["date","dd/MM/yyyy HH:mm","dd/MM/yyyy H:mm","yyyy-MM-dd HH:mm:ss"]
timezone => "UTC"
target => "@timestamp"
}
mutate {
#gsub => ["message","\","'"]
#remove_field => ["message"]
}
ruby {
code => "
event.set('index_monitoring_twitter',event.get('path').split('/')[-1].gsub('.csv',''))
"
}
#grok {
# match => [ "path", "/(?<index_monitoring_twitter>[^/]+).csv" ]
# }
}
output {
elasticsearch {
hosts => ["http://localhost:9200"]
#index => "monitorizaciones_hoarder"
index => "monitorizaciones_%{index_monitoring_twitter}"
}
stdout{codec => rubydebug} #para comprobaciones
}
Kibana: