How to split xml arrays?

Hi Folks,

I'm pretty new in ELK stack. I'm trying to parse xml and get the all elements from array send to elasticsearch.
So I wrote this filter which works fine:

filter {
xml {
source => "message"
#target => "xmldata"
store_xml => "false"
xpath => ["/OMA/ESMLog//LogEntry/Index/text()","index"]
xpath => ["/OMA/ESMLog//LogEntry/Status/text()","status"]
xpath => ["/OMA/ESMLog//LogEntry/TimeStampRaw/text()","timestampraw"]
xpath => ["/OMA/ESMLog//LogEntry/Description/text()","description"]
}
mutate {
remove_field => [ "message", "inxml", "xmldata" ]
}

    mutate {
    replace => {
        "index" => "%{[index][0]}"
        "status" => "%{[status][0]}"
        "timestampraw" => "%{[timestampraw][0]}"
        "description" => "%{[description][0]}"

     }
    }
date {
  match => [ "timestampraw", "UNIX" ]
}

}
The only problem is, the filter picks the first array element from every each. But I want to sent the all elements to elasticsearch as a new event.
I've tied to understand split, but I have no clue how it can help on my scenario.
Here is one of my xml doc
<?xml version="1.0" encoding="UTF-8"?> <OMA> <ESMLog> <LogEntry> <Index>0</Index> <Status>2</Status> <TimeStamp>Tue Nov 3 07:22:57 2015</TimeStamp> <TimeStampRaw>1446535377</TimeStampRaw> <Description>The system board Mem2 temperature is within range.</Description> </LogEntry> <LogEntry> <Index>1</Index> <Status>3</Status> <TimeStamp>System Boot</TimeStamp> <TimeStampRaw>1446535378</TimeStampRaw> <Description>The system board Mem2 temperature is less than the lower warning threshold.</Description> </LogEntry> <LogEntry> <Index>2</Index> <Status>2</Status> <TimeStamp>Mon Nov 2 14:17:09 2015</TimeStamp> <TimeStampRaw>1446473829</TimeStampRaw> <Description>Drive 0 is installed in disk drive bay 1.</Description> </LogEntry> <LogEntry> <Index>3</Index> <Status>4</Status> <TimeStamp>Mon Nov 2 14:17:04 2015</TimeStamp> <TimeStampRaw>1446473824</TimeStampRaw> <Description>Drive 0 is removed from disk drive bay 1.</Description> </LogEntry> <LogEntry> <Index>4</Index> <Status>2</Status> <TimeStamp>Mon Nov 2 14:15:54 2015</TimeStamp> <TimeStampRaw>1446473754</TimeStampRaw> <Description>Drive 0 is installed in disk drive bay 1.</Description> </LogEntry> <LogEntry> <Index>5</Index> <Status>4</Status> <TimeStamp>Mon Nov 2 13:58:54 2015</TimeStamp> <TimeStampRaw>1446472734</TimeStampRaw> <Description>Drive 0 is removed from disk drive bay 1.</Description> </LogEntry> <NumRecords>11</NumRecords> </ESMLog> <ObjStatus>2</ObjStatus> <SMStatus>0</SMStatus> </OMA>
So I want every new index as a new event from the "LogEntry" sections. I hope it makes sense.
Thanks,

How about parsing the whole document and running the split on the resulting ESMLog field? That should leave you with a number of events that each contain a LogEntry field with various subfields. Clean those up and you should be ready to go.

1 Like

Thanks Magnus,

Finally I found this solution later yesterday :slight_smile:
Here is my developed filter:

filter {
        xml {
                source => "message"
                target => "xmldata"
                store_xml => "false"
                xpath => ["/OMA/ESMLog//LogEntry","logentry"]
        }

        mutate {
            remove_field => [ "message", "inxml", "xmldata" ]
        }

        split {
                field => "[logentry]"
        }

        xml {
                source => "logentry"
                store_xml => "false"
                xpath => ["/LogEntry/Index/text()","index"]
                xpath => ["/LogEntry/Status/text()","status"]
                xpath => ["/LogEntry/TimeStampRaw/text()","timestampraw"]
                xpath => ["/LogEntry/Description/text()","description"]
        }
        mutate {
                replace => {
                "index" => "%{[index][0]}"
                "status" => "%{[status][0]}"
                "timestampraw" => "%{[timestampraw][0]}"
                "description" => "%{[description][0]}"

                }
        }
   date {
            match => [ "timestampraw", "UNIX" ]
    }
            mutate {
        remove_field => [ "logentry" , "timestampraw" ]
    }
}

Hi Everyone

I followed Peter example to tr then i got the error message below.

Only string and Array types are splittable. field: [LogEntry] is of type = NilClass

I should fix which part? Thanks for your help.
P.S Windows 2008, Logstash 2.4.0

input
{
        file
        {
                path => ["E:\logstash.log\6.xml"]
                start_position => "beginning"
                codec => multiline {
                        pattern => "^<\?OMA .*>"
                        negate => "true"
                        what => "previous"
                }
        }
}
 
filter {
        xml {
                source => "message"
                target => "xmldata"
                store_xml => "false"
                xpath => "[logentry]"
        }
 
        mutate {
            remove_field => [ "message", "inxml", "xmldata" ]
        }

         split {
                field => "/OMA/ESMLog//LogEntry"
         }
 
        xml {
                source => "logentry"
                store_xml => "false"
                xpath => ["/LogEntry/Index/text()","index"]
                xpath => ["/LogEntry/Status/text()","status"]
                xpath => ["/LogEntry/TimeStampRaw/text()","timestampraw"]
                xpath => ["/LogEntry/Description/text()","description"]
        }
        mutate {
                replace => {
                "index" => "%{[index]}"
                "status" => "%{[status]}"
                "timestampraw" => "%{[timestampraw][0]}"
                "description" => "%{[description][0]}"
 
                }
        }
        mutate {
                remove_field => [ "logentry" , "timestampraw" ]
        }
}
 
 
output {
    stdout{ codec => rubydebug }
    file {
      path => "save.log"
      message_format => "%{message}"     
    }
}
        field => "/OMA/ESMLog//LogEntry"

Not sure what this is supposed to mean. Surely you don't have a field with this name? The split filter doesn't support XPath syntax, so you're asking it to split a field named, literally, "/OMA/ESMLog//LogEntry".

Hi Magnus
i have changed it same as pazarr ( Peter ) example below, but still has same error message.

split { field => "[logentry]" }

Have you verified that you have a logentry field?

Hi Magnus
Yes, i had verified it.
if you don't mind , here is my sample code
https://drive.google.com/drive/folders/0B5awSIsVj3s8ck5JMWZkaEJFSVk

Many Thanks

Nothing in those files proved that the logentry field exists. Please show the output from a stdout { codec => rubydebug } output.

E:\Logstash\bin>logstash -f conf\conf.file.xml.3.conf
Settings: Default pipeline workers: 2
Pipeline main started
 
 
 
 
 
 
 
[31mException in pipelineworker, the pipeline stopped processing new events, please check your filter configuration and restart Logstash. {"exception"=>#, "backtrace"=>["E:/Logstash/vendor/bundle/jruby/1.9/g
ems/logstash-filter-split-2.0.5/lib/logstash/filters/split.rb:49:in `filter'", "E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/f
ilters/base.rb:151:in `multi_filter'", "org/jruby/RubyArray.java:1613:in `each'", "E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstas
h/filters/base.rb:148:in `multi_filter'", "(eval):197:in `filter_func'", "E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipelin
e.rb:267:in `filter_batch'", "org/jruby/RubyArray.java:1613:in `each'", "org/jruby/RubyEnumerable.java:852:in `inject'", "E:/Logstash/vendor/bundle/jruby/1.9/ge
ms/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:265:in `filter_batch'", "E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pip
eline.rb:223:in `worker_loop'", "E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:201:in `start_workers'"], :level=>:e
rror}[0mLogStash::ConfigurationError: Only String and Array types are splittable. field:logentry is of type = NilClass
         filter at E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-filter-split-2.0.5/lib/logstash/filters/split.rb:49
   multi_filter at E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:151
           each at org/jruby/RubyArray.java:1613
   multi_filter at E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:148
    filter_func at (eval):197
   filter_batch at E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:267
           each at org/jruby/RubyArray.java:1613
         inject at org/jruby/RubyEnumerable.java:852
   filter_batch at E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:265
    worker_loop at E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:223
  start_workers at E:/Logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:201

i decide to write a new sample

input
{ 	
	file
	{ 
		path => ["D:\ELK\logstash.log\2.xml"]
		codec => multiline { 
			 pattern => "" 
			 negate => "true"
			 what => "previous"
		} 
	}
} 

filter {
  xml {
    source => "message"
    target => "parsed"
  }

  split {
    field => "[parsed][station]"
    add_field => {
      stationName			    => "%{[parsed][station][name]}"
    }
  }
}

output {
	stdout { 
	codec => rubydebug 
	}
}

  
    1
    Eads St & 15th St S
    31000
    1475808264572
    38.858971
    -77.05323
    true
    false
    0
    
    false
    true
    9
    6
    1475805732039
  
  
    2
    Eads St & 15th St S
    31000
    1475808264572
    38.858971
    -77.05323
    true
    false
    0
    
    false
    true
    9
    6
    1475805732039
  


Pipeline main started
^C要終止批次工作嗎 (Y/N)? SIGINT received. Shutting down the agent. {:level=>:warn}
stopping pipeline {:id=>"main"}
Error parsing xml with XmlSimple {:source=>"message", :value=>"\r\n    3\r\n    Eads St & 15th St S\r\n    31000\r\n    1475808264572\r\n    38.858971\r\n    -77.05323\r\n    true\r\n    false\r\n    0\r\n    \r\n    false\r\n    true\r\n    9\r\n    6\r\n    1475805732039\r\n  \r", :exception=>#<REXML::ParseException: #
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/document.rb:94:in `add'
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/element.rb:882:in `add'
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/child.rb:21:in `initialize'
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/parent.rb:13:in `initialize'
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/element.rb:59:in `initialize'
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/element.rb:880:in `add'
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/element.rb:297:in `add_element'
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/document.rb:101:in `add_element'
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/parsers/treeparser.rb:33:in `parse'
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/document.rb:249:in `build'
D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/document.rb:43:in `initialize'
D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/xml-simple-1.1.5/lib/xmlsimple.rb:971:in `parse'
D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/xml-simple-1.1.5/lib/xmlsimple.rb:164:in `xml_in'
D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/xml-simple-1.1.5/lib/xmlsimple.rb:203:in `xml_in'
D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-filter-xml-2.2.0/lib/logstash/filters/xml.rb:186:in `filter'
D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:151:in `multi_filter'
org/jruby/RubyArray.java:1613:in `each'
D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:148:in `multi_filter'
(eval):67:in `filter_func'
D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:267:in `filter_batch'
org/jruby/RubyArray.java:1613:in `each'
org/jruby/RubyEnumerable.java:852:in `inject'
D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:265:in `filter_batch'
D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:223:in `worker_loop'
D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:201:in `start_workers'
...
attempted adding second root element to document
Line: 3
Position: 60
Last 80 unconsumed characters:
>, :backtrace=>["D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/parsers/treeparser.rb:95:in `parse'", "D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/document.rb:249:in `build'", "D:/ELK/logstash/vendor/jruby/lib/ruby/1.9/rexml/document.rb:43:in `initialize'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/xml-simple-1.1.5/lib/xmlsimple.rb:971:in `parse'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/xml-simple-1.1.5/lib/xmlsimple.rb:164:in `xml_in'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/xml-simple-1.1.5/lib/xmlsimple.rb:203:in `xml_in'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-filter-xml-2.2.0/lib/logstash/filters/xml.rb:186:in `filter'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:151:in `multi_filter'", "org/jruby/RubyArray.java:1613:in `each'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:148:in `multi_filter'", "(eval):67:in `filter_func'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:267:in `filter_batch'", "org/jruby/RubyArray.java:1613:in `each'", "org/jruby/RubyEnumerable.java:852:in `inject'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:265:in `filter_batch'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:223:in `worker_loop'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:201:in `start_workers'"], :level=>:warn}
Exception in pipelineworker, the pipeline stopped processing new events, please check your filter configuration and restart Logstash. {"exception"=>#, "backtrace"=>["D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-filter-split-2.0.5/lib/logstash/filters/split.rb:49:in `filter'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:151:in `multi_filter'", "org/jruby/RubyArray.java:1613:in `each'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:148:in `multi_filter'", "(eval):68:in `filter_func'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:267:in `filter_batch'", "org/jruby/RubyArray.java:1613:in `each'", "org/jruby/RubyEnumerable.java:852:in `inject'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:265:in `filter_batch'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:223:in `worker_loop'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:201:in `start_workers'"], :level=>:error}
LogStash::ConfigurationError: Only String and Array types are splittable. field:[parsed][station] is of type = NilClass
         filter at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-filter-split-2.0.5/lib/logstash/filters/split.rb:49
   multi_filter at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:151
           each at org/jruby/RubyArray.java:1613
   multi_filter at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:148
    filter_func at (eval):68
   filter_batch at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:267
           each at org/jruby/RubyArray.java:1613
         inject at org/jruby/RubyEnumerable.java:852
   filter_batch at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:265
    worker_loop at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:223
  start_workers at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:201

i found a new issue about the xml content

input {
    stdin{}
}

filter {
    xml {
        source => "message"
        store_xml => "false"
        xpath => ["/result/logline","loglines"]
        remove_field => [ "message", "host" ] 
    }
    split {
        field => "loglines"
    }
    xml {
        source => "loglines"
        store_xml => "false"
        xpath => ["/logline/description/text()","description"]
        remove_field => [ "loglines" ] 
    }    
}
output {
    stdout{ codec => rubydebug }
}

if i input this content it's working.

item 1item 2

but not works like below


	
		item 1
	
	
		item 2
	

D:\ELK\logstash\bin>logstash -f conf.file.xml.1.conf -v
e[32mstarting agent {:level=>:info}e[0m
e[32mstarting pipeline {:id=>"main", :level=>:info}e[0m
Settings: Default pipeline workers: 4
e[32mStarting pipeline {:id=>"main", :pipeline_workers=>4, :batch_size=>125, :batch_delay=>5, :max_inflight=>500, :level=>:info}e[0m
Pipeline main started


item 1


item 2

e[31mException in pipelineworker, the pipeline stopped processing new events, please check your filter configuration and restart Logstash. {"exception"=>#, "backtrace"=>["D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-filter-split-2.0.5/lib/logstash/filters/split.rb:49:in `filter'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:151:in `multi_filter'", "org/jruby/RubyArray.java:1613:in `each'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:148:in `multi_filter'", "(eval):95:in `filter_func'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:267:in `filter_batch'", "org/jruby/RubyArray.java:1613:in `each'", "org/jruby/RubyEnumerable.java:852:in `inject'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:265:in `filter_batch'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:223:in `worker_loop'", "D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:201:in `start_workers'"], :level=>:error}e[0m
LogStash::ConfigurationError: Only String and Array types are splittable. field:loglines is of type = NilClass
         filter at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-filter-split-2.0.5/lib/logstash/filters/split.rb:49
   multi_filter at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:151
           each at org/jruby/RubyArray.java:1613
   multi_filter at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/filters/base.rb:148
    filter_func at (eval):95
   filter_batch at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:267
           each at org/jruby/RubyArray.java:1613
         inject at org/jruby/RubyEnumerable.java:852
   filter_batch at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:265
    worker_loop at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:223
  start_workers at D:/ELK/logstash/vendor/bundle/jruby/1.9/gems/logstash-core-2.4.0-java/lib/logstash/pipeline.rb:201

@platstar you may either write your xml one a single line or experiment with the multiline codec