We're attempting to import historical data in CSV format via Logstash, but it's choking on the date values.
So far, we have the following config:
input {
stdin {}
file {
path => ["/Applications/logstash-6.2.4/data/data.csv"]
start_position => "beginning"
}
}
filter {
csv {
separator => ","
columns => [
"keen.timestamp",
"keen.created_at",
"keen.id",
"keen.location.coordinates",
"bytes_received",
"city",
"client_id",
"country",
"day_of_month",
"day_of_week",
"end_time",
"error.bytesParsed",
"error.code",
"error.errno",
"error.expose",
"error.message",
"error.path",
"error.rawPacket.data",
"error.rawPacket.type",
"error.status",
"error.statusCode",
"error.syscall",
"file_name",
"hour_of_day",
"ip",
"keen_ip_geo.city",
"keen_ip_geo.continent",
"keen_ip_geo.coordinates",
"keen_ip_geo.country",
"keen_ip_geo.country_code",
"keen_ip_geo.postal_code",
"keen_ip_geo.province",
"month",
"path",
"post_id",
"referrer",
"referrer_info.medium",
"referrer_info.source",
"referrer_info.term",
"region",
"request_duration",
"request_durations",
"server_ip",
"site_id",
"start_time",
"status","total_connections",
"total_request_duration",
"url",
"user_agent",
"user_agent_parsed.browser.family",
"user_agent_parsed.browser.major",
"user_agent_parsed.browser.minor",
"user_agent_parsed.browser.patch",
"user_agent_parsed.device.family",
"user_agent_parsed.os.family",
"user_agent_parsed.os.major",
"user_agent_parsed.os.minor",
"user_agent_parsed.os.patch",
"user_agent_parsed.os.patch_minor",
"year"
]
}
mutate { convert => [ "keen.timestamp", "date" ] }
mutate { convert => [ "keen.created_at", "date" ] }
mutate { convert => [ "start_time", "date" ] }
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "correct_downloads"
document_type => "correct_downloads"
}
stdout {}
}
... but when we run this, we get the following errors:
[2018-09-05T09:21:45,908][ERROR][logstash.pipeline ] Error registering plugin {:pipeline_id=>"main", :plugin=>"#<LogStash::FilterDelegator:0x180ad90d @metric_events_out=org.jruby.proxy.org.logstash.instrument.metrics.counter.LongCounter$Proxy2 - name: out value:0, @metric_events_in=org.jruby.proxy.org.logstash.instrument.metrics.counter.LongCounter$Proxy2 - name: in value:0, @metric_events_time=org.jruby.proxy.org.logstash.instrument.metrics.counter.LongCounter$Proxy2 - name: duration_in_millis value:0, @id="5de7e9f1cd7ddffd32df93a251f985d4a55ef70e3c82d51e4235e61861b5ad57", @klass=LogStash::Filters::Mutate, @metric_events=#<LogStash::Instrument::NamespacedMetric:0x266343bc @metric=#<LogStash::Instrument::Metric:0x52296872 @collector=#<LogStash::Instrument::Collector:0x160c4610 @agent=nil, @metric_store=#<LogStash::Instrument::MetricStore:0x5800276d @store=#<Concurrent:0x00000000000fb0 entries=3 default_proc=nil>, @structured_lookup_mutex=#Mutex:0x9799a00, @fast_lookup=#<Concurrent:0x00000000000fb4 entries=76 default_proc=nil>>>>, @namespace_name=[:stats, :pipelines, :main, :plugins, :filters, :"5de7e9f1cd7ddffd32df93a251f985d4a55ef70e3c82d51e4235e61861b5ad57", :events]>, @filter=<LogStash::Filters::Mutate convert=>{"keen.timestamp"=>"date"}, id=>"5de7e9f1cd7ddffd32df93a251f985d4a55ef70e3c82d51e4235e61861b5ad57", enable_metric=>true, periodic_flush=>false>>", :error=>"translation missing: en.logstash.agent.configuration.invalid_plugin_register", :thread=>"#<Thread:0x23a1cd1d run>"}
[2018-09-05T09:21:45,924][ERROR][logstash.pipeline ] Pipeline aborted due to error {:pipeline_id=>"main", :exception=>#<LogStash::ConfigurationError: translation missing: en.logstash.agent.configuration.invalid_plugin_register>, :backtrace=>["/Applications/logstash-6.2.4/vendor/bundle/jruby/2.3.0/gems/logstash-filter-mutate-3.3.1/lib/logstash/filters/mutate.rb:219:in `block in register'", "org/jruby/RubyHash.java:1343:in `each'", "/Applications/logstash-6.2.4/vendor/bundle/jruby/2.3.0/gems/logstash-filter-mutate-3.3.1/lib/logstash/filters/mutate.rb:217:in `register'", "/Applications/logstash-6.2.4/logstash-core/lib/logstash/pipeline.rb:342:in `register_plugin'", "/Applications/logstash-6.2.4/logstash-core/lib/logstash/pipeline.rb:353:in `block in register_plugins'", "org/jruby/RubyArray.java:1734:in `each'", "/Applications/logstash-6.2.4/logstash-core/lib/logstash/pipeline.rb:353:in `register_plugins'", "/Applications/logstash-6.2.4/logstash-core/lib/logstash/pipeline.rb:731:in `maybe_setup_out_plugins'", "/Applications/logstash-6.2.4/logstash-core/lib/logstash/pipeline.rb:363:in `start_workers'", "/Applications/logstash-6.2.4/logstash-core/lib/logstash/pipeline.rb:290:in `run'", "/Applications/logstash-6.2.4/logstash-core/lib/logstash/pipeline.rb:250:in `block in start'"], :thread=>"#<Thread:0x23a1cd1d run>"}
[2018-09-05T09:21:45,959][ERROR][logstash.agent ] Failed to execute action {:id=>:main, :action_type=>LogStash::ConvergeResult::FailedAction, :message=>"Could not execute action: LogStash::PipelineAction::Create/pipeline_id:main, action_result: false", :backtrace=>nil}
... and when I remove the "mutate" options, we get another error:
2018-09-05T08:51:57,497][WARN ][logstash.outputs.elasticsearch] Could not index event to Elasticsearch. {:status=>400, :action=>["index", {:_id=>nil, :_index=>"correct_downloads", :_type=>"correct_downloads", :_routing=>nil}, #], :response=>{"index"=>{"_index"=>"correct_downloads", "_type"=>"correct_downloads", "_id"=>"1Uq3qGUBxmfELKVAaRe2", "status"=>400, "error"=>{"type"=>"mapper_parsing_exception", "reason"=>"failed to parse [keen.timestamp]", "caused_by"=>{"type"=>"illegal_argument_exception", "reason"=>"Invalid format: "keen.timestamp""}}}}}
I'm assuming there are additional processing options for dates?