[SOLVED] Logstash not parsing UTM logs correctly


#1

I've got some UTM logs I need to process with logstash, but it only parsing half of it. The logs are divided into four parts, so to speak, namely:

  1. ulogd
  2. aua
  3. reverseproxy
  4. wlan

I've built a config file, which uses conditionals for processing the logs, like so:

filter {
  if [type] == "utm" {
    if "reverseproxy" in [message]{
      grok {
      }
   }
   if "ulogd" in [message] {
     grok {
     }
   }
   if "wlan" in [message]{
     grok {
     }
   }
   if "aua" in [message] {
     grok {
     }
   }

You get the idea. The problem is, that "reverseproxy" and "aua" are not processed at all. "wlan" and "ulogd" work perfectly, while the other two are just being ignored, it seems. I tried to mutate the not-working ones and added tags ("aua" and "reverseproxy" respectively) which are showing up, but logstash seems to ignore the grok filters I built (which are working fine in the grok debugger by the way).

Is there any way to find out what the problem is here? :-/

Thanks in advance!

Cheers
Christoph


(Magnus B├Ąck) #2

Please provide a more complete example (including an example of the logs that you want to parse).


#3

Okay, sorry for that.

I'm using my logstash instance for collecting logs from our firewall, these are categorized based on the messages in them:

Example for reverseproxy log:

2015:05:20-16:45:16 hostname reverseproxy: id="0299" srcip="x.x.x.x" localip="y.y.y.y" size="434" user="-" host="z.z.z.z" method="POST" statuscode="502" reason="-" extra="-" exceptions="-" time="300105178" url="/url/" server="webmail.example.com" referer="-" cookie="-" set-cookie="-" 

Example for aua log:

2015:05:20-00:01:59 hostname aua[1615]: id="3006" severity="info" sys="System" sub="auth" name="Trying x.x.x.x (adirectory)"

My filter for these logs looks like this:

filter {
  if [type] == "utm" {
    if "reverseproxy" in [message]{
      grok {
      break_on_match => false
        match => [
        "message", "<%{INT:priority}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:interface}: id=\"%{INT:id}\" srcip=\"%{IP:source_ip}\" localip=\"%{IP:local_ip}\" size=\"%{INT:size}\" user=\"%{USER:user}\" host=\"%{HOSTNAME:host}\" method=\"%{WORD:method}\" statuscode=\"%{INT:statuscode}\" reason=\"%{DATA:reason}\" extra=\"%{DATA:extra}\" exceptions=\"%{DATA:exceptions}\" time=\"%{INT:time}\" url=\"%{DATA:url}\" server=\"%{DATA:server}\" referer=\"%{DATA:referer}\" cookie=\"%{GREEDYDATA:cookie}\" set-cookie=\"%{DATA:set_cookie}\" ",
        "message", "<%{INT:priority}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:interface}: id=\"%{INT:id}\" srcip=\"%{IP:source_ip}\" localip=\"%{IP:local_ip}\" size=\"%{INT:size}\" user=\"%{USER:user}\" host=\"%{HOSTNAME:host}\" method=\"%{WORD:method}\" statuscode=\"%{INT:statuscode}\" reason=\"%{DATA:reason}\" extra=\"%{DATA:extra}\" exceptions=\"%{DATA:exceptions}\" time=\"%{INT:time}\" url=\"%{DATA:url}\" server=\"%{DATA:server}\" referer=\"%{DATA:referer}\"; cookie=\"%{GREEDYDATA:cookie}\" set-cookie=\"%{DATA:set_cookie}\" "]
      }
      mutate {
        add_tag => "reverseproxy"
        remove_tag => "_grokparsefailure"
      }
      geoip {
        source => "source_ip"
      }
    }
    if "wlan" in [message]{
      grok {
        break_on_match => false
        match => [
          "message", "<%{INT:priority}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{IP:ip} %{WORD:interface}: %{DATA:wlan}: STA %{MAC:mac} WPA: %{GREEDYDATA:wifi_message}",
          "message", "<%{INT:priority}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{IP:ip} %{WORD:interface}: %{DATA:wlan}: STA %{MAC:mac} %{DATA:protocol}: %{GREEDYDATA:wifi_message}"]
      }
      mutate {
        remove_tag => "_grokparsefailure"
        add_tag => "wlan" }
      geoip {
        source => "source_ip"
      }
    }
    if "ulogd" in [message]{
      grok {
        break_on_match => false
        match => [
          "message", "<%{INT:fw_id}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOST:logsource} %{WORD:program}\[%{INT:pid}\]: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{WORD:action}\" fwrule=\"%{INT:fwrule}\" outitf=\"%{NOTSPACE:outitf}\" mark=\"%{DATA:mark}\" app=\"%{DATA:app}\" srcmac=\"%{MAC:srcmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" ",
          "message", "<%{INT:fw_id}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOST:logsource} %{WORD:program}\[%{INT:pid}\]: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{WORD:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" outitf=\"%{NOTSPACE:outitf}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" ",
          "message", "<%{INT:fw_id}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOST:logsource} %{WORD:program}\[%{INT:pid}\]: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{WORD:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" outitf=\"%{NOTSPACE:outitf}\" srcmac=\"%{MAC:srcmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" ",
          "message", "<%{INT:fw_id}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOST:logsource} %{WORD:program}\[%{INT:pid}\]: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{WORD:action}\" fwrule=\"%{INT:fwrule}\" outitf=\"%{NOTSPACE:outitf}\" srcmac=\"%{MAC:srcmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" ",
          "message", "<%{INT:fw_id}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOST:logsource} %{WORD:program}\[%{INT:pid}\]: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{WORD:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" outitf=\"%{NOTSPACE:outitf}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" ",
          "message", "<%{INT:fw_id}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOST:logsource} %{WORD:program}\[%{INT:pid}\]: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{WORD:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" outitf=\"%{NOTSPACE:outitf}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" tcpflags=\"%{DATA:tcpflags}\" ",
          "message", "<%{INT:fw_id}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOST:logsource} %{WORD:program}\[%{INT:pid}\]: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" action=\"%{WORD:action}\" fwrule=\"%{INT:fwrule}\" initf=\"%{NOTSPACE:initf}\" srcmac=\"%{MAC:srcmac}\" dstmac=\"%{MAC:dstmac}\" srcip=\"%{IPV4:source_ip}\" dstip=\"%{IPV4:destination_ip}\" proto=\"%{WORD:protocol}\" length=\"%{INT:length}\" tos=\"%{DATA:tos}\" prec=\"%{DATA:prec}\" ttl=\"%{INT:ttl}\" srcport=\"%{INT:srcport}\" dstport=\"%{INT:dstport}\" "]
      }
      mutate {
        remove_tag => "_grokparsefailure"
        add_tag => "ulogd" }
      geoip {
        source => "source_ip"
      }
    }
    if "aua" in [message]{
      grok {
        break_on_match => false
        match => [
          "message", "<%{INT:fw_id}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOST:logsource} %{WORD:program}\[%{INT:pid}\]: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"%{DATA:name}\" srcip=\"%{IP:source_ip}\" host=\"%{DATA:external_host}\" user=\"%{DATA:user}\" caller=\"%{WORD:caller}\" reason=\"%{WORD:reason}\" ",
          "message", "<%{INT:fw_id}>(?<timestamp>%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOST:logsource} %{WORD:program}\[%{INT:pid}\]: id=\"%{INT:id}\" severity=\"%{WORD:severity}\" sys=\"%{WORD:sys}\" sub=\"%{WORD:sub}\" name=\"Trying %{IP:ip_intern} %{DATA}\" "]
      }
      mutate {
        remove_tag => "_grokparsefailure"
        add_tag => "aua" }
      geoip {
        source => "source_ip"
      }
    }
  }
}</pre>

Note: two of the four are working perfectly (ulogd and wlan), but I cannot figure out why.


#4

Okay, I was finally able to solve this:

Instead of escaping all the double quotes, I used single quotes at the beginning and end of the "message" part and did not escape the double quotes. Works like a charm!


(system) #5