Multiline pattern for logstash rsyslog

UDP does not guarantee delivery nor ordering, so I do not see the point in trying to use a multiline codec with it as you are always going to run the risk of having garbled inputs. If you care about correctly assembling multiline messages, use TCP instead.

Thank you. When i use tcp, i get this (logstash log):

[WARN ] 2018-10-02 09:00:01.881 [nioEventLoopGroup-2-2] multiline - Received an event that has a different character encoding than you configured. {:text=>"H<\x9B\xA7\xAB\xC8\xCFɘ\xDCH\xB3<\t/#\u0012\x90x6OW\x91\x9F\x931\xB9\u0015\xAD\xB2\xF8\u0010\x9B\ei\x96\x93\xE0n\xE5Oܻ\xB7!\xEA\u0011E\u0019\xFD\t&\xCA\nt\xBD#\xA2J\\\x98\x86>8H\xC6\xF5\xE2\xB0\u0002\xCDW\xE3YU\u001C{EoDT\xFC\xDE\xC4\xF6]\xF5y\xDD\xF2\\M{A\xBD\f\xE3d\xAA[\xA6\xE3No\xE4\xF9r\xAA\xBB\xC6İ'\xDED\xF7\u0002\xC7rmO\x8D\x938\x9F,\xD6a\xA0\xBB\xBE\xFA\xF8\xB5\xB5\xA6\x99\x91\x9A\xC4\v2&Y\xCAT5\xE5\xEE\u001F\u0014\xE5\x91\xD7и[\xAE\xD2l\u0019\xCDk\xE5\xCB\u0014\xD9x[\xCDU\xFD\xD1\xFF\x94\x84\x97E\u0015\xEC\xC0%\xAD\xCA۲\xACn\u0018\xF4\xE4\u0018\xF4t\xF5\xD9k\x94\xC1\xF0\xEC,N.\xD4PM\xA2\x97\xD5t\xF6b4\xCF\xD5<U\xF3\e\x91\xBA\x8A\x96a\xBCR_ZG\xEB(P\x9F\x8E\x97\xD1\"N\"5\xBA\u001D\xCD\xD7y\x9C&\xEAy\u0018/\xA2\xB3@}b\x9Dl템\xA7\xF3uv=[\xA6\xF9y\xBAz9\\\x9D]\x9F\xA7\xEBe\x9A,\xC3;\x97Q\x92_\xBF\f\x97\u05C8\u001A'\xEA2^F\x8B8\x89\xD4\xE6\xFFs#L\xCE\u0016\xD15\u0012\xA8\x93i<\xBB:O//\xD3d\x9A\x85\xC9\xD9,\xBD=\xD9H\xD2\xD4\u0002\xF5\x99Ux\u0016\xA9I\x9A\xAB\xE7\xE9:9#\xE3z\xAF\xA2\x8A\xFCGE\u0019]\xA0)\xB2\xE4&\xAE\xEB\x8AR\x9F\xE5\xE1\xFC\xA6\x9A\xAF\xC2y4\v\xE77\u0003֕\rT\xF9\u0014\xF0\u007FR\x94\xD1\xEF\xD1\u0000\xCFR\xA6#\u001D\xB0ᙚ(\ee\r.\xB7\"\x9E\xC6;s4\xDDÆ{e\u001E.\u0016\xD7\xCF\xD7ɦ@\xAE\u0010\x98\xA38Y\xAE7\u001E\x84\xDD\\\xFF\u0019\x95\u001DȠ\u0019\u0010p\xF8v\xF1+\xCD.=\xAE\xCE\u0017\xFCr\xF1\r\xAF[\xB9\xB8v\x9D\\v\xB2\xBA\xB2\xAD\x9D\u007FQ\x94џ\xD1j\x87\xC1\xC0f\xC2m\xD7\u001E\v\x86\xEA\xFE\u0004?\u0003\xA6f8\xDD2\xA0\xEB\xFA!\u0005\x9B\xBE8\x87\xAB\\\xD7\xFCͯE`=S\xE3\xFBWE\u0019\xA5h\xE4]\xD2\xDB\t\u00

If you share your config it is probably easier for someone to help you. I have however not dealt a lot with character encoding problems, so will probably not be able to help.

input {
    tcp {
    port => 5000
    type => syslog
    codec => multiline {
    pattern => "^%{SYSLOGBASE} %{TIMESTAMP_ISO8601}"
    negate => true
    what => "previous"
     }
    }
}

filter {
    grok {
      match => [ "message", "%{SYSLOGBASE} %{TIMESTAMP_ISO8601:timestamp_tarantool} \[%
      {NUMBER:id}\] %{WORD}/%{NUMBER}/%{WORD:method} %{WORD:log_level}> %
      {GREEDYDATA:msg}",
                 "message", "%{SYSLOGBASE} %{TIMESTAMP_ISO8601:timestamp_tarantool} \[%{NUMBER:id}\] %{WORD}/%{NUMBER}/%{WORD:method}/%{IP}:%{NUMBER} %{WORD:log_level}> %{GREEDYDATA:msg}",
                 "message", "%{SYSLOGBASE} %{TIMESTAMP_ISO8601:timestamp_tarantool} \[%{NUMBER:id}\] %{WORD}/%{NUMBER}/%{WORD:method}/%{IP}:%{NUMBER} %{WORD}.%{WORD}:%{NUMBER} %{WORD:log_level}> %{GREEDYDATA:msg}"
               ]
}

     mutate {
      replace => { "message" => "%{msg}" }
      remove_field => [ "msg" ]
     }

     if "_grokparsefailure" in [tags] { drop {} }

     if [message] =~ "queue" {
      mutate {
       add_field => { "repair_queue" => "%{message}"}
        }
       }

}

output {
    elasticsearch {
    action => "index"
    index => "logstash-%{+YYYY.MM.dd}"
    hosts  => "ibmetrics2:9200"
 }
}

Have you tried specifying an appropriate charset for the multiline codec?

But i have LANG=en_US.UTF-8 on the source server.

It is working now in version 6.1.1 and UDP.

input {
    udp {
    port => 5000
    type => syslog
    codec => multiline {
    pattern => "^<%{NUMBER}>%{SYSLOGBASE} %{TIMESTAMP_ISO8601} "
    negate => true
    what => "previous"
        }
    }
}

filter {
    grok {
      match => [ "message", "%{SYSLOGBASE} %{TIMESTAMP_ISO8601:timestamp_tarantool} \[%{NUMBER:id}\] %{WORD}/%{NUMBER}/%{WORD:method} %{WORD:log_level}> %{GREEDYDATA:msg}",
                 "message", "%{SYSLOGBASE} %{TIMESTAMP_ISO8601:timestamp_tarantool} \[%{NUMBER:id}\] %{WORD}/%{NUMBER}/%{WORD:method}/%{IP}:%{NUMBER} %{WORD:log_level}> %{GREEDYDATA:msg}",
                 "message", "%{SYSLOGBASE} %{TIMESTAMP_ISO8601:timestamp_tarantool} \[%{NUMBER:id}\] %{WORD}/%{NUMBER}/%{WORD:method}/%{IP}:%{NUMBER} %{WORD}.%{WORD}:%{NUMBER} %{WORD:log_level}> %{GREEDYDATA:msg}"
               ]
       }

     mutate {
      replace => { "message" => "%{msg}" }
      remove_field => [ "msg" ]
     }

     if "_grokparsefailure" in [tags] { drop {} }

     if [message] =~ "queue" {
      mutate {
       add_field => { "repair_queue" => "%{message}"}
        }
       }
     if "nil" in [message] { drop {} }
}

output {
    elasticsearch {
    action => "index"
    index => "logstash-%{+YYYY.MM.dd}"
    hosts  => "ibmetrics2:9200"
 }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.