Not properly formated XML

Hi All,

I have raw event logs like this

<Event xmlns='http://schemas.microsoft.com/win/2004/08/events/event'><System><Provider Name='Service Control Manager' Guid='{555908d1-a6d7-4695-8e1e-26931d2012f4}' EventSourceName='Service Control Manager'/><EventID Qualifiers='16384'>7036</EventID><Version>0</Version><Level>Information</Level><Task>None</Task><Opcode></\nOpcode><Keywords>Classic</Keywords><TimeCreated SystemTime='2020-02-01T00:55:45.040458400Z'/><EventRecordID>296447</EventRecordID><Correlation/><Execution ProcessID='712' ThreadID='6072'/><Channel>System</Channel><Computer>TermServ-p.local</Computer><Security/></System><EventData>The WinHTTP Web Proxy Auto-Discovery Service service entered the running state.</EventData></Event>

My conf file is here

input {
file {
     path => "/etc/logstash/conf.d/abc.log"
     start_position => beginning
     sincedb_path => "/dev/null"
     codec => multiline {
       pattern => "<Event\s+xmlns=.+?>"
       negate => true
       what => "previous"
     }
  }
}

filter {
  xml {
    source => "message"
    target => "xml_parsed"
    remove_namespaces => "true"
    force_array => "false"
    xpath => ["//Provider/@Name", "[event][provider]", "//Provider/@Guid", "[process][entity_id]", "//Provider/@EventSourceName", "[event][dataset]", "//EventID/@Qualifiers", "[tl_custom][event_id][qualifier]", "//EventID/text()", "[event][code]", "//Level/text()", "[log][level]", "//Task/text()", "[event][action]", "//Keywords/text()", "[tl_custom][keyword]", "//TimeCreated/@SystemTime", "[event][created]", "//EventRecordID/text()", "[winlog][record_id]", "//Execution/@ProcessID", "[process][pid]", "//Execution/@ThreadID", "[process][thread][id]", "//Channel/text()", "[event][type]", "//Computer/text()", "[winlog][computer_name]", "//Security/@UserID", "[user][name]", "//EventData/text()", "[event][description]" ]
}
  mutate {
    remove_field => [ "xml_parsed"]
    copy => { "[winlog][computer_name]" => "[most][name]"}
    lowercase => ["[most][name]"]
    remove_field => ["[tl_custom][event_id][qualifier]" , "[tl_custom][keyword]"]
  }
}

output {
  stdout { codec => rubydebug}
}

I'm getting error like this

[WARN ] 2020-11-29 10:37:24.283 [[main]>worker0] xml - Error parsing xml with XmlSimple {:source=>"message", :value=>"<Event xmlns='http://schemas.microsoft.com/win/2004/08/events/event'><System><Provider Name='Service Control Manager' Guid='{555908d1-a6d7-4695-8e1e-26931d2012f4}' EventSourceName='Service Control Manager'/><EventID Qualifiers='16384'>7036</EventID><Version>0</Version><Level>Information</Level><Task>None</Task><Opcode></\\nOpcode><Keywords>Classic</Keywords><TimeCreated SystemTime='2020-02-01T00:55:45.040458400Z'/><EventRecordID>296447</EventRecordID><Correlation/><Execution ProcessID='712' ThreadID='6072'/><Channel>System</Channel><Computer>TermServ-PHX.corp.local</Computer><Security/></System><EventData>The WinHTTP Web Proxy Auto-Discovery Service service entered the running state.</EventData></Event>", :exception=>#<REXML::ParseException: #<NoMethodError: undefined method `[]' for nil:NilClass>
uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rexml/parsers/baseparser.rb:343:in `pull_event'
uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rexml/parsers/baseparser.rb:185:in `pull'
uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rexml/parsers/treeparser.rb:23:in `parse'
uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rexml/document.rb:288:in `build'
uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rexml/document.rb:45:in `initialize'
/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/xml-simple-1.1.5/lib/xmlsimple.rb:971:in `parse'
/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/xml-simple-1.1.5/lib/xmlsimple.rb:164:in `xml_in'
/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/xml-simple-1.1.5/lib/xmlsimple.rb:203:in `xml_in'
/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-filter-xml-4.1.1/lib/logstash/filters/xml.rb:195:in `filter'
/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:159:in `do_filter'
/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:178:in `block in multi_filter'
org/jruby/RubyArray.java:1809:in `each'
/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:175:in `multi_filter'
org/logstash/config/ir/compiler/AbstractFilterDelegatorExt.java:134:in `multi_filter'
/usr/share/logstash/logstash-core/lib/logstash/java_pipeline.rb:295:in `block in start_workers'
...
Exception parsing
Line: 1
Position: 711
Last 80 unconsumed characters:
</\nOpcode><Keywords>Classic</Keywords><TimeCreated SystemTime='2020-02-01T00:55:>, :backtrace=>["uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rexml/parsers/baseparser.rb:442:in `pull_event'", "uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rexml/parsers/baseparser.rb:185:in `pull'", "uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rexml/parsers/treeparser.rb:23:in `parse'", "uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rexml/document.rb:288:in `build'", "uri:classloader:/META-INF/jruby.home/lib/ruby/stdlib/rexml/document.rb:45:in `initialize'", "/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/xml-simple-1.1.5/lib/xmlsimple.rb:971:in `parse'", "/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/xml-simple-1.1.5/lib/xmlsimple.rb:164:in `xml_in'", "/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/xml-simple-1.1.5/lib/xmlsimple.rb:203:in `xml_in'", "/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/logstash-filter-xml-4.1.1/lib/logstash/filters/xml.rb:195:in `filter'", "/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:159:in `do_filter'", "/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:178:in `block in multi_filter'", "org/jruby/RubyArray.java:1809:in `each'", "/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:175:in `multi_filter'", "org/logstash/config/ir/compiler/AbstractFilterDelegatorExt.java:134:in `multi_filter'", "/usr/share/logstash/logstash-core/lib/logstash/java_pipeline.rb:295:in `block in start_workers'"]}

As per my understanding, the raw log contains \n in one XML <Opcode></\nOpcode> that is not being parsed through XML filter plugin. Raw log is not in my control but if I remove this tag <Opcode></\nOpcode> and run my config file, it parse all fields properly without any error. Now can I remove this XML tag <Opcode></\nOpcode> before filter it through XML filter plugin? or can you please suggest any other way to get the desired results.

Thank you very much

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.