Extract multiple xml of multiline log

Hello.

I have a log file with similar estructure:

05 Feb 2021 14:00:00,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:00:00,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:00:00,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:00:01,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:00:01,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:00:01,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:00:02,213 [AAA-11] INFO  - Init Log
05 Feb 2021 14:00:02,213 [AAA-11] INFO  - select name1
05 Feb 2021 14:00:02,213 [AAA-11] INFO  - Include important info1
05 Feb 2021 14:00:02,231 [AAA-11] INFO  - message
<xml1>
    <tag1>Value 1</tag1>
    <tag2>Value 2</tag2>
</xml1>
05 Feb 2021 14:00:03,131 [AAA-11] INFO  - Include other important info: <xml2><tag1>Value 1</tag1><tag2>Value 2</tag2></xml2>
05 Feb 2021 14:00:03,131 [AAA-11] INFO  - end log
05 Feb 2021 14:00:03,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:00:03,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:00:03,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:00:04,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:00:04,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:00:04,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:00:05,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:00:05,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:00:05,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:01:00,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:01:00,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:01:00,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:01:01,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:01:01,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:01:01,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:01:02,213 [AAA-11] INFO  - Init Log
05 Feb 2021 14:01:02,213 [AAA-11] INFO  - select name2
05 Feb 2021 14:01:02,213 [AAA-11] INFO  - Include inportant info2
05 Feb 2021 14:01:02,231 [AAA-11] INFO  - message
<xml1>
    <tag1>Value 1</tag1>
    <tag2>Value 2</tag2>
</xml1>
05 Feb 2021 14:01:03,131 [AAA-11] INFO  - Include other important info: <xml2><tag1>Value 1</tag1><tag2>Value 2</tag2></xml2>
05 Feb 2021 14:01:03,131 [AAA-11] INFO  - end log
05 Feb 2021 14:01:03,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:01:03,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:01:03,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:01:04,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:01:04,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:01:04,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:01:05,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:01:05,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:01:05,231 [AAA-11] INFO  - Line3[: 0]

I need to get something like this:

{
    "timestamp":"05 Feb 2021 14:00:02,213" 
    "select": "name1",
    "Include": "inportant info2",
    "message": {
        "xml1":{
            "tag1": "Value 1"
            "tag2": "Value 2"
        }
    },
    "info" {
        "xml2":{
            "tag1": "Value 1"
            "tag2": "Value 2"
        }
    }
}

I currently use multiline to separate the logs by blocks

Are you consuming all of the log entries you posted as a single event? Do you need help with the multiline codec or help parsing out the data you show?

I have multiline codec where an individual event contains:

    05 Feb 2021 14:00:02,213 [AAA-11] INFO  - Init Log
    05 Feb 2021 14:00:02,213 [AAA-11] INFO  - select name1
    05 Feb 2021 14:00:02,213 [AAA-11] INFO  - Include important info1
    05 Feb 2021 14:00:02,231 [AAA-11] INFO  - message
    <xml1>
        <tag1>Value 1</tag1>
        <tag2>Value 2</tag2>
    </xml1>
    05 Feb 2021 14:00:03,131 [AAA-11] INFO  - Include other important info: <xml2><tag1>Value 1</tag1><tag2>Value 2</tag2></xml2>
    05 Feb 2021 14:00:03,131 [AAA-11] INFO  - end log

I need to insert in elasticsearch something like this:

{
    "timestamp":"05 Feb 2021 14:00:02,213" 
    "select": "name1",
    "Include": "inportant info2",
    "message": {
        "xml1":{
            "tag1": "Value 1"
            "tag2": "Value 2"
        }
    },
    "info" {
        "xml2":{
            "tag1": "Value 1"
            "tag2": "Value 2"
        }
    }
}

Here is how I would do it...

    # Grab the first timestamp
    grok {
        pattern_definitions => { "TIMESTAMP" => "%{MONTHDAY} %{MONTH} %{YEAR} %{TIME}" }
        match => { "message" => "%{TIMESTAMP:[@metadata][ts]}" }
    }
    date { match => [ "[@metadata][ts]", "dd MMM yyyy HH:mm:ss,SSS" ] }

    # Split into array using (and removing) the timestamps
    ruby {
        code => '
            msg = event.get("message").split(/\d{2} \w{3} \d{4} [\d:,]{12}/)
            event.set("msgArray", msg)
        '
        remove_field => [ "message" ]
    }

    # Match every pattern against every entry in the array
    grok {
        break_on_match => false
        match => {
            "msgArray" => [
                "select %{WORD:select}",
                "Include (?<Include>[^\n]*)",
                "(?<xml1><xml1>.*</xml1>)",
                "(?<xml2><xml2>.*</xml2>)"
            ]
        }
        remove_field => [ "msgArray" ]
    }

    # Include matches twice, so just get the first one
    mutate { replace => { "Include" => "%{[Include][0]}" } }

    xml { source => "xml1" target => "[message][xml1]" force_array => false remove_field => [ "xml1" ] }
    xml { source => "xml2" target => "[message][xml2]" force_array => false remove_field => [ "xml2" ] }

The array looks like

 "msgArray" => [
    [0] "    ",
    [1] " [AAA-11] INFO  - Init Log\n    ",
    [2] " [AAA-11] INFO  - select name1\n    ",
    [3] " [AAA-11] INFO  - Include important info1\n    ",
    [4] " [AAA-11] INFO  - message\n    <xml1>\n        <tag1>Value 1</tag1>\n        <tag2>Value 2</tag2>\n    </xml1>\n    ",
    [5] " [AAA-11] INFO  - Include other important info: <xml2><tag1>Value 1</tag1><tag2>Value 2</tag2></xml2>\n    ",
    [6] " [AAA-11] INFO  - end log"
],

The final event looks like

   "message" => {
    "xml2" => {
        "tag2" => "Value 2",
        "tag1" => "Value 1"
    },
    "xml1" => {
        "tag2" => "Value 2",
        "tag1" => "Value 1"
    }
},
   "Include" => "important info1",
    "select" => "name1",
"@timestamp" => 2021-02-05T19:00:02.213Z

You may need to tweak the filters, but it should give you some ideas about how to do it.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.