Hi,
I'm trying to parse a very complex XML with nested arrays into logstash, but for some reason XML filter only parses right 2 objects, discarding many of them.
Here below my configuration for Logstash:
   input {
       file {
          path => "C:/ELK/results.xml"
          start_position => "beginning"
          sincedb_path => "nul"
          type => "xml"
          codec => multiline {
            pattern => "<CxXMLResults"
            negate => true
            what => "previous"
          }
       }
    }
    filter {
      xml {
        source => "message"
        store_xml => false
        xpath => ["CxXMLResults/@InitiatorName", "initiator_name"]
        xpath => ["CxXMLResults/@Owner", "owner"]
        xpath => ["CxXMLResults/@ScanId", "scan_id"]
        xpath => ["CxXMLResults/@ProjectId", "project_id"]
        xpath => ["CxXMLResults/@ProjectName", "project_name"]
        xpath => ["CxXMLResults/@TeamFullPathOnReportDate", "team_full_path"]
        xpath => ["CxXMLResults/@DeepLink", "scan_link"]
        xpath => ["CxXMLResults/@ScanStart", "scan_start"]
        xpath => ["CxXMLResults/@Preset", "preset"]
        xpath => ["CxXMLResults/@ScanTime", "scan_time"]
        xpath => ["CxXMLResults/@LinesOfCodeScanned", "loc"]
        xpath => ["CxXMLResults/@FilesScanned", "files_scanned"]
        xpath => ["CxXMLResults/@ReportCreationTime", "report_creation_date"]
        xpath => ["CxXMLResults/@Team", "team"]
        xpath => ["CxXMLResults/@CheckmarxVersion", "cx_version"]
        xpath => ["CxXMLResults/@ScanComments", "scan_comments"]
        xpath => ["CxXMLResults/@ScanType", "scan_type"]
        xpath => ["CxXMLResults/@SourceOrigin", "source_origin"]
        xpath => ["CxXMLResults/@Visibility", "visibility"]
        xpath => ["CxXMLResults/Query", "queries"]
      }
      split { 
        field => "queries"
      }
      xml {
        source => "queries"
        store_xml => false
        xpath => ["Query/@id", "query_id"]
        xpath => ["Query/@Categories", "query_categories"]
        xpath => ["Query/@cweId", "query_cwe_id"]
        xpath => ["Query/@name", "query_name"]
        xpath => ["Query/@group", "query_group"]
        xpath => ["Query/@Severity", "query_severity"]
        xpath => ["Query/@Language", "query_language"]
        xpath => ["Query/@LanguageHash", "query_language_hash"]
        xpath => ["Query/@LanguageChangeDate", "query_language_change_date"]
        xpath => ["Query/@SeverityIndex", "query_severity_index"]
        xpath => ["Query/@QueryPath", "query_path"]
        xpath => ["Query/@QueryVersionCode", "query_version_code"]
        xpath => ["Query/Result", "results"]
      }
      split { 
        field => "results"
      }
      xml {
        source => "results"
        store_xml => false
        xpath => ["Result/@NodeId", "result_node_id"]
        xpath => ["Result/@FileName", "result_filename"]
        xpath => ["Result/@Status", "result_status"]
        xpath => ["Result/@Line", "result_line"]
        xpath => ["Result/@Column", "result_column"]
        xpath => ["Result/@FalsePositive", "result_false_positive"]
        xpath => ["Result/@Severity", "result_severity"]
        xpath => ["Result/@AssignToUser", "result_assigned_user"]
        xpath => ["Result/@state", "result_state"]
        xpath => ["Result/@Remark", "result_remark"]
        xpath => ["Result/@DeepLink", "result_link"]
        xpath => ["Result/@SeverityIndex", "result_severity_index"]
        xpath => ["Result/Path/@ResultId", "result_id"]
        xpath => ["Result/Path/@PathId", "result_path_id"]
        xpath => ["Result/Path/@SimilarityId", "result_similarity_id"]
        xpath => ["Result/Path/PathNode[1]/FileName/text()", "result_source_filename"]
        xpath => ["Result/Path/PathNode[1]/Line/text()", "result_source_line"]
        xpath => ["Result/Path/PathNode[1]/Column/text()", "result_source_column"]
        xpath => ["Result/Path/PathNode[1]/NodeId/text()", "result_source_node_id"]
        xpath => ["Result/Path/PathNode[1]/Name/text()", "result_source_name"]
        xpath => ["Result/Path/PathNode[1]/Type/text()", "result_source_type"]
        xpath => ["Result/Path/PathNode[1]/Length/text()", "result_source_length"]
        xpath => ["Result/Path/PathNode[1]/Snippet/Line/Number/text()", "result_source_snippet_line_number"]
        xpath => ["Result/Path/PathNode[1]/Snippet/Line/Code/text()", "result_source_snippet_line_code"]
        xpath => ["Result/Path/PathNode[last()]/FileName/text()", "result_dest_filename"]
        xpath => ["Result/Path/PathNode[last()]/Line/text()", "result_dest_line"]
        xpath => ["Result/Path/PathNode[last()]/Column/text()", "result_dest_column"]
        xpath => ["Result/Path/PathNode[last()]/NodeId/text()", "result_dest_node_id"]
        xpath => ["Result/Path/PathNode[last()]/Name/text()", "result_dest_name"]
        xpath => ["Result/Path/PathNode[last()]/Type/text()", "result_dest_type"]
        xpath => ["Result/Path/PathNode[last()]/Length/text()", "result_dest_length"]
        xpath => ["Result/Path/PathNode[last()]/Snippet/Line/Number/text()", "result_dest_snippet_line_number"]
        xpath => ["Result/Path/PathNode[last()]/Snippet/Line/Code/text()", "result_dest_snippet_line_code"]
      }
      mutate {
        remove_field => [ "message", "queries", "results" ]
      }
      if "_split_type_failure" in [tags] {
        drop {}
      }
    }
    output {
  stdout {
    codec=>rubydebug
  }
  file {
    path => "C:/ELK/ResultsXML.json"
  }
    }
Why xml filter only parses right 2 objects, giving the error for the other ones "Only String and Array types are splittable. field:queries is of type = NilClass"?
The expected number of entries in logstash are 162, but right now I can only see in Kibana that only 2 went there, in fact. The main goal is to present every "Result" from the XML with information from the parents ("Query", "CxXMLResults") and children ("Path", "PathNode")
How can I fix this ?
Thanks!