Hi, i parse xml file that contains 500 tag named" ReportItem ". I am using split filter to add disctint documents for each ReportItem . Logstash only parse the 3 documents and return file _split_type_failure.
Here is my config file :
input {
file {
path => "/home/doc.xml"
start_position => "beginning"
sincedb_path => "/dev/null"
codec => multiline {
pattern => "^<Data>"
negate => "true"
what => "next"
}
}
}
filter{
xml{
target => "xml_content"
store_xml => "false"
source => "message"
xpath =>
[
"/xmldata/head1/key1/text()", "key1",
"/xmldata/ReportItems/ReportItem", "ReportItem"
]
}
split{
field => "ReportItem"
}
xml{
source => "ReportItem"
store_xml => "false"
xpath => [
"/ReportItem/Name/text()" , "ReportName"
]
}
mutate {
remove_field => ["message"]
remove_field => ["path"]
}
date {
match => [ "date", "dd-MM-yyyy HH:mm:ss" ]
timezone => "Europe/Amsterdam"
}
}
output{
elasticsearch {
hosts => ["localhost:9200"]
}
}
Here the simplified xml file (the file contains 500 tag "ReportItem"):
<xmldata>
<head1>
<key1>Value1</key1>
<key2> Value2 </key2>
<id> 00001 </id>
<date> 01-01-2016 09:00:00 </date>
</head1>
<ReportItems>
<ReportItem>
<Name>SqlInjection</Name>
</ReportItem>
<ReportItem>
<Name>XSS </Name>
</ReportItem>
</ReportItems>
</xmldata>
here is logstash log error :
[2018-08-13T08:33:34,559][INFO ][logstash.outputs.elasticsearch] New Elasticsearch output {:class=>"LogStash::Outputs::ElasticSearch", :hosts=>["//localhost:9200"]}
[2018-08-13T08:33:36,223][INFO ][logstash.pipeline ] Starting pipeline {"id"=>"main", "pipeline.workers"=>1, "pipeline.batch.size"=>125, "pipeline.batch.delay"=>5, "pipeline.max_inflight"=>125}
[2018-08-13T08:33:36,654][INFO ][logstash.pipeline ] Pipeline main started
[2018-08-13T08:33:36,816][INFO ][logstash.agent ] Successfully started Logstash API endpoint {:port=>9600}
[2018-08-13T08:33:37,738][WARN ][logstash.agent ] stopping pipeline {:id=>"main"}
[2018-08-13T08:33:38,476][WARN ][logstash.filters.split ] Only String and Array types are splittable. field:ReportItem is of type = NilClass
[2018-08-13T08:34:05,639][INFO ][logstash.modules.scaffold] Initializing module {:module_name=>"fb_apache", :directory=>"/usr/share/logstash/modules/fb_apache/configuration"}
[2018-08-13T08:34:05,645][INFO ][logstash.modules.scaffold] Initializing module {:module_name=>"netflow", :directory=>"/usr/share/logstash/modules/netflow/configuration"}
[2018-08-13T08:34:07,103][INFO ][logstash.outputs.elasticsearch] Elasticsearch pool URLs updated {:changes=>{:removed=>[], :added=>[http://localhost:9200/]}}
[2018-08-13T08:34:07,114][INFO ][logstash.outputs.elasticsearch] Running health check to see if an Elasticsearch connection is working {:healthcheck_url=>http://localhost:9200/, :path=>"/"}
[2018-08-13T08:34:07,469][WARN ][logstash.outputs.elasticsearch] Restored connection to ES instance {:url=>"http://localhost:9200/"}
[2018-08-13T08:34:07,675][INFO ][logstash.outputs.elasticsearch] Using mapping template from {:path=>nil}
[2018-08-13T08:34:07,677][INFO ][logstash.outputs.elasticsearch] Attempting to install template {:manage_template=>{"template"=>"logstash-*", "version"=>50001, "settings"=>{"index.refresh_interval"=>"5s"}, "mappings"=>{"_default_"=>{"_all"=>{"enabled"=>true, "norms"=>false}, "dynamic_templates"=>[{"message_field"=>{"path_match"=>"message", "match_mapping_type"=>"string", "mapping"=>{"type"=>"text", "norms"=>false}}}, {"string_fields"=>{"match"=>"*", "match_mapping_type"=>"string", "mapping"=>{"type"=>"text", "norms"=>false, "fields"=>{"keyword"=>{"type"=>"keyword", "ignore_above"=>256}}}}}], "properties"=>{"@timestamp"=>{"type"=>"date", "include_in_all"=>false}, "@version"=>{"type"=>"keyword", "include_in_all"=>false}, "geoip"=>{"dynamic"=>true, "properties"=>{"ip"=>{"type"=>"ip"}, "location"=>{"type"=>"geo_point"}, "latitude"=>{"type"=>"half_float"}, "longitude"=>{"type"=>"half_float"}}}}}}}}