Hi,
I'm trying to parse a very complex XML with nested arrays into logstash, but for some reason XML filter only parses right 2 objects, discarding many of them.
Here below my configuration for Logstash:
input {
file {
path => "C:/ELK/results.xml"
start_position => "beginning"
sincedb_path => "nul"
type => "xml"
codec => multiline {
pattern => "<CxXMLResults"
negate => true
what => "previous"
}
}
}
filter {
xml {
source => "message"
store_xml => false
xpath => ["CxXMLResults/@InitiatorName", "initiator_name"]
xpath => ["CxXMLResults/@Owner", "owner"]
xpath => ["CxXMLResults/@ScanId", "scan_id"]
xpath => ["CxXMLResults/@ProjectId", "project_id"]
xpath => ["CxXMLResults/@ProjectName", "project_name"]
xpath => ["CxXMLResults/@TeamFullPathOnReportDate", "team_full_path"]
xpath => ["CxXMLResults/@DeepLink", "scan_link"]
xpath => ["CxXMLResults/@ScanStart", "scan_start"]
xpath => ["CxXMLResults/@Preset", "preset"]
xpath => ["CxXMLResults/@ScanTime", "scan_time"]
xpath => ["CxXMLResults/@LinesOfCodeScanned", "loc"]
xpath => ["CxXMLResults/@FilesScanned", "files_scanned"]
xpath => ["CxXMLResults/@ReportCreationTime", "report_creation_date"]
xpath => ["CxXMLResults/@Team", "team"]
xpath => ["CxXMLResults/@CheckmarxVersion", "cx_version"]
xpath => ["CxXMLResults/@ScanComments", "scan_comments"]
xpath => ["CxXMLResults/@ScanType", "scan_type"]
xpath => ["CxXMLResults/@SourceOrigin", "source_origin"]
xpath => ["CxXMLResults/@Visibility", "visibility"]
xpath => ["CxXMLResults/Query", "queries"]
}
split {
field => "queries"
}
xml {
source => "queries"
store_xml => false
xpath => ["Query/@id", "query_id"]
xpath => ["Query/@Categories", "query_categories"]
xpath => ["Query/@cweId", "query_cwe_id"]
xpath => ["Query/@name", "query_name"]
xpath => ["Query/@group", "query_group"]
xpath => ["Query/@Severity", "query_severity"]
xpath => ["Query/@Language", "query_language"]
xpath => ["Query/@LanguageHash", "query_language_hash"]
xpath => ["Query/@LanguageChangeDate", "query_language_change_date"]
xpath => ["Query/@SeverityIndex", "query_severity_index"]
xpath => ["Query/@QueryPath", "query_path"]
xpath => ["Query/@QueryVersionCode", "query_version_code"]
xpath => ["Query/Result", "results"]
}
split {
field => "results"
}
xml {
source => "results"
store_xml => false
xpath => ["Result/@NodeId", "result_node_id"]
xpath => ["Result/@FileName", "result_filename"]
xpath => ["Result/@Status", "result_status"]
xpath => ["Result/@Line", "result_line"]
xpath => ["Result/@Column", "result_column"]
xpath => ["Result/@FalsePositive", "result_false_positive"]
xpath => ["Result/@Severity", "result_severity"]
xpath => ["Result/@AssignToUser", "result_assigned_user"]
xpath => ["Result/@state", "result_state"]
xpath => ["Result/@Remark", "result_remark"]
xpath => ["Result/@DeepLink", "result_link"]
xpath => ["Result/@SeverityIndex", "result_severity_index"]
xpath => ["Result/Path/@ResultId", "result_id"]
xpath => ["Result/Path/@PathId", "result_path_id"]
xpath => ["Result/Path/@SimilarityId", "result_similarity_id"]
xpath => ["Result/Path/PathNode[1]/FileName/text()", "result_source_filename"]
xpath => ["Result/Path/PathNode[1]/Line/text()", "result_source_line"]
xpath => ["Result/Path/PathNode[1]/Column/text()", "result_source_column"]
xpath => ["Result/Path/PathNode[1]/NodeId/text()", "result_source_node_id"]
xpath => ["Result/Path/PathNode[1]/Name/text()", "result_source_name"]
xpath => ["Result/Path/PathNode[1]/Type/text()", "result_source_type"]
xpath => ["Result/Path/PathNode[1]/Length/text()", "result_source_length"]
xpath => ["Result/Path/PathNode[1]/Snippet/Line/Number/text()", "result_source_snippet_line_number"]
xpath => ["Result/Path/PathNode[1]/Snippet/Line/Code/text()", "result_source_snippet_line_code"]
xpath => ["Result/Path/PathNode[last()]/FileName/text()", "result_dest_filename"]
xpath => ["Result/Path/PathNode[last()]/Line/text()", "result_dest_line"]
xpath => ["Result/Path/PathNode[last()]/Column/text()", "result_dest_column"]
xpath => ["Result/Path/PathNode[last()]/NodeId/text()", "result_dest_node_id"]
xpath => ["Result/Path/PathNode[last()]/Name/text()", "result_dest_name"]
xpath => ["Result/Path/PathNode[last()]/Type/text()", "result_dest_type"]
xpath => ["Result/Path/PathNode[last()]/Length/text()", "result_dest_length"]
xpath => ["Result/Path/PathNode[last()]/Snippet/Line/Number/text()", "result_dest_snippet_line_number"]
xpath => ["Result/Path/PathNode[last()]/Snippet/Line/Code/text()", "result_dest_snippet_line_code"]
}
mutate {
remove_field => [ "message", "queries", "results" ]
}
if "_split_type_failure" in [tags] {
drop {}
}
}
output {
stdout {
codec=>rubydebug
}
file {
path => "C:/ELK/ResultsXML.json"
}
}
Why xml filter only parses right 2 objects, giving the error for the other ones "Only String and Array types are splittable. field:queries is of type = NilClass"?
The expected number of entries in logstash are 162, but right now I can only see in Kibana that only 2 went there, in fact. The main goal is to present every "Result" from the XML with information from the parents ("Query", "CxXMLResults") and children ("Path", "PathNode")
How can I fix this ?
Thanks!