Logstash input file codec multiline not working as expected

I am trying to process multiline messages in log using input codec 
multiline. I am getting stuck and sometimes giving grokparse error.
Any recommendations?

--Log lines start --

 2016-06-24 12:21:38||INFO||<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body>></soapenv:Body></soapenv:Envelope>
2016-06-27 15:19:03||INFO||<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body>
  <WorkFulfillmentPowerMedResponse>
    <CompletedWorkItem xmlns:ns1="commonj.sdo" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></CompletedWorkItem>
    <Result xmlns:ns1="commonj.sdo" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><ns55:appCode>0</ns55:appCode><ns56:errorMessage/></Result>
  </WorkFulfillmentPowerMedResponse>
</xci0:updatedCompletedWorkItemResponse>
</soapenv:Body></soapenv:Envelope>
2016-06-28 11:08:32||INFO||<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"><soapenv:Body></soapenv:Body></soapenv:Envelope>

--Log lines end --

logstash config file

# The # character at the beginning of a line indicates a comment. Use
# comments to describe your configuration.
input {
    file{
        path => "/Logs/PowerApp/PowerApp.log"
        type => "powerapp"
        sincedb_path => "/dev/null"
        start_position => "beginning"
        ignore_older => 0
        codec => multiline {
           #pattern => "201\d-(0[1-9]|1[012])-(0[1-9]|[12]\d|3[01])\s(0[1-9]|1\d|2[0-3]):([0-5]\d):([0-5]\d)||"
           pattern => "201\d-(0[1-9]|1[012])-(0[1-9]|[12]\d|3[01])"
           negate => true
           what => previous
        }
    }


}
# The filter part of this file is commented out to indicate that it is
# optional.
filter {

   if[type] == "powerapp"{
#       multiline {
#           pattern => "201\d-(0[1-9]|1[012])-(0[1-9]|[12]\d|3[01])"
#           negate => "true"
#           what => "previous"
#       }
       grok {
           match => ["message", "(?<date>(([0-9]+)-*)+ ([0-9]+:*)+)\|\|%{WORD:ll}\|\|(?<msg>(.|\r|\n)*)"]
           add_field => ["application_name", "powerapp"]
           add_field => ["log_type", "powerapp_log"]
       }
       date {
           match => ["date","yyyy-MM-dd HH:mm:ss"]
           target => "@timestamp"
       }
#       metrics {
#           flush_interval => 10
#           meter => "powerapp_events"
#           add_tag => ["powerapp_metric", "generated"]
#       }
   }
}

output {
    elasticsearch {
        hosts => ["127.0.0.1:9200"]
        index => "logstash-walgreens-%{+YYYY.MM.dd}"
    }

    stdout { codec => rubydebug }

    if "powerapp_metric" in [tags] {
        stdout {
                codec => line{
                    format => "powerapp metric : count %{[powerapp_events][count]} - rate: %{[powerapp_events][rate_1m]} per minute"
                }
        }
    }


    if "_grokparsefailure" in [tags] {

        if[type] == "powerapp"{
            file{
                path => "/Logs/PowerApp-ProcessinError.txt"
            }
        }

        stdout { 
            codec => rubydebug 
        }
    }
}

Please give an example of the kind of message that the grok message can't match. Both the raw input and the output of a stdout { codec => rubydebug } output, please.

Please go through the link below discuss the same problem

It is a long existing problem. As a workaround people used to use multiline filter and logs were getting parsed corrcetly. BUT now Multiline filter has been deprecated in favor of multiline-codec
Does anybody has any other workaround ? Last line of the logs is still not getting processed.

For me setting auto_flush_interval => 5 does solve the problem.

1 Like