Extract multiple xml of multiline log

Hello.

I have a log file with similar estructure:

05 Feb 2021 14:00:00,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:00:00,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:00:00,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:00:01,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:00:01,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:00:01,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:00:02,213 [AAA-11] INFO  - Init Log
05 Feb 2021 14:00:02,213 [AAA-11] INFO  - select name1
05 Feb 2021 14:00:02,213 [AAA-11] INFO  - Include important info1
05 Feb 2021 14:00:02,231 [AAA-11] INFO  - message
<xml1>
    <tag1>Value 1</tag1>
    <tag2>Value 2</tag2>
</xml1>
05 Feb 2021 14:00:03,131 [AAA-11] INFO  - Include other important info: <xml2><tag1>Value 1</tag1><tag2>Value 2</tag2></xml2>
05 Feb 2021 14:00:03,131 [AAA-11] INFO  - end log
05 Feb 2021 14:00:03,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:00:03,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:00:03,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:00:04,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:00:04,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:00:04,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:00:05,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:00:05,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:00:05,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:01:00,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:01:00,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:01:00,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:01:01,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:01:01,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:01:01,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:01:02,213 [AAA-11] INFO  - Init Log
05 Feb 2021 14:01:02,213 [AAA-11] INFO  - select name2
05 Feb 2021 14:01:02,213 [AAA-11] INFO  - Include inportant info2
05 Feb 2021 14:01:02,231 [AAA-11] INFO  - message
<xml1>
    <tag1>Value 1</tag1>
    <tag2>Value 2</tag2>
</xml1>
05 Feb 2021 14:01:03,131 [AAA-11] INFO  - Include other important info: <xml2><tag1>Value 1</tag1><tag2>Value 2</tag2></xml2>
05 Feb 2021 14:01:03,131 [AAA-11] INFO  - end log
05 Feb 2021 14:01:03,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:01:03,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:01:03,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:01:04,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:01:04,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:01:04,231 [AAA-11] INFO  - Line3[: 0]
05 Feb 2021 14:01:05,213 [AAA-11] INFO  - Line A:[0]
05 Feb 2021 14:01:05,231 [AAA-11] INFO  - Line2:[0]
05 Feb 2021 14:01:05,231 [AAA-11] INFO  - Line3[: 0]

I need to get something like this:

{
    "timestamp":"05 Feb 2021 14:00:02,213" 
    "select": "name1",
    "Include": "inportant info2",
    "message": {
        "xml1":{
            "tag1": "Value 1"
            "tag2": "Value 2"
        }
    },
    "info" {
        "xml2":{
            "tag1": "Value 1"
            "tag2": "Value 2"
        }
    }
}

I currently use multiline to separate the logs by blocks

Are you consuming all of the log entries you posted as a single event? Do you need help with the multiline codec or help parsing out the data you show?

I have multiline codec where an individual event contains:

    05 Feb 2021 14:00:02,213 [AAA-11] INFO  - Init Log
    05 Feb 2021 14:00:02,213 [AAA-11] INFO  - select name1
    05 Feb 2021 14:00:02,213 [AAA-11] INFO  - Include important info1
    05 Feb 2021 14:00:02,231 [AAA-11] INFO  - message
    <xml1>
        <tag1>Value 1</tag1>
        <tag2>Value 2</tag2>
    </xml1>
    05 Feb 2021 14:00:03,131 [AAA-11] INFO  - Include other important info: <xml2><tag1>Value 1</tag1><tag2>Value 2</tag2></xml2>
    05 Feb 2021 14:00:03,131 [AAA-11] INFO  - end log

I need to insert in elasticsearch something like this:

{
    "timestamp":"05 Feb 2021 14:00:02,213" 
    "select": "name1",
    "Include": "inportant info2",
    "message": {
        "xml1":{
            "tag1": "Value 1"
            "tag2": "Value 2"
        }
    },
    "info" {
        "xml2":{
            "tag1": "Value 1"
            "tag2": "Value 2"
        }
    }
}

Here is how I would do it...

    # Grab the first timestamp
    grok {
        pattern_definitions => { "TIMESTAMP" => "%{MONTHDAY} %{MONTH} %{YEAR} %{TIME}" }
        match => { "message" => "%{TIMESTAMP:[@metadata][ts]}" }
    }
    date { match => [ "[@metadata][ts]", "dd MMM yyyy HH:mm:ss,SSS" ] }

    # Split into array using (and removing) the timestamps
    ruby {
        code => '
            msg = event.get("message").split(/\d{2} \w{3} \d{4} [\d:,]{12}/)
            event.set("msgArray", msg)
        '
        remove_field => [ "message" ]
    }

    # Match every pattern against every entry in the array
    grok {
        break_on_match => false
        match => {
            "msgArray" => [
                "select %{WORD:select}",
                "Include (?<Include>[^\n]*)",
                "(?<xml1><xml1>.*</xml1>)",
                "(?<xml2><xml2>.*</xml2>)"
            ]
        }
        remove_field => [ "msgArray" ]
    }

    # Include matches twice, so just get the first one
    mutate { replace => { "Include" => "%{[Include][0]}" } }

    xml { source => "xml1" target => "[message][xml1]" force_array => false remove_field => [ "xml1" ] }
    xml { source => "xml2" target => "[message][xml2]" force_array => false remove_field => [ "xml2" ] }

The array looks like

 "msgArray" => [
    [0] "    ",
    [1] " [AAA-11] INFO  - Init Log\n    ",
    [2] " [AAA-11] INFO  - select name1\n    ",
    [3] " [AAA-11] INFO  - Include important info1\n    ",
    [4] " [AAA-11] INFO  - message\n    <xml1>\n        <tag1>Value 1</tag1>\n        <tag2>Value 2</tag2>\n    </xml1>\n    ",
    [5] " [AAA-11] INFO  - Include other important info: <xml2><tag1>Value 1</tag1><tag2>Value 2</tag2></xml2>\n    ",
    [6] " [AAA-11] INFO  - end log"
],

The final event looks like

   "message" => {
    "xml2" => {
        "tag2" => "Value 2",
        "tag1" => "Value 1"
    },
    "xml1" => {
        "tag2" => "Value 2",
        "tag1" => "Value 1"
    }
},
   "Include" => "important info1",
    "select" => "name1",
"@timestamp" => 2021-02-05T19:00:02.213Z

You may need to tweak the filters, but it should give you some ideas about how to do it.