Tribble Logstash Field Output

Hi there,
I got a Problem with my current Logstash configuration.
I am trying to write a config for the Sophos UTM logs, the fields gets extracted like I want but every time there is
the same value 3 times in the specific fields....
Any ideas why?

My configuration:

filter {
...
if "ulogd" in [message]{
grok {
break_on_match => false
match => [
"message", "<%{INT:fw_id}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="%{DATA:name}" action="%{WORD:action}" fwrule="%{INT:fwrule}" outitf="%{NOTSPACE:outitf}" mark="%{DATA:mark}" app="%{DATA:app}" srcmac="%{MAC:srcmac}" srcip="%{IPV4:source_ip}" dstip="%{IPV4:destination_ip}" proto="%{WORD:protocol}" length="%{INT:length}" tos="%{DATA:tos}" prec="%{DATA:prec}" ttl="%{INT:ttl}" srcport="%{INT:srcport}" dstport="%{INT:dstport}" ", "message", "<%{INT:fw_id}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="%{DATA:name}" action="%{WORD:action}" fwrule="%{INT:fwrule}" initf="%{NOTSPACE:initf}" outitf="%{NOTSPACE:outitf}" srcmac="%{MAC:srcmac}" dstmac="%{MAC:dstmac}" srcip="%{IPV4:source_ip}" dstip="%{IPV4:destination_ip}" proto="%{WORD:protocol}" length="%{INT:length}" tos="%{DATA:tos}" prec="%{DATA:prec}" ttl="%{INT:ttl}" srcport="%{INT:srcport}" dstport="%{INT:dstport}" ",
"message", "<%{INT:fw_id}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="%{DATA:name}" action="%{WORD:action}" fwrule="%{INT:fwrule}" initf="%{NOTSPACE:initf}" outitf="%{NOTSPACE:outitf}" srcmac="%{MAC:srcmac}" srcip="%{IPV4:source_ip}" dstip="%{IPV4:destination_ip}" proto="%{WORD:protocol}" length="%{INT:length}" tos="%{DATA:tos}" prec="%{DATA:prec}" ttl="%{INT:ttl}" srcport="%{INT:srcport}" dstport="%{INT:dstport}" ","message", "<%{INT:fw_id}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="%{DATA:name}" action="%{WORD:action}" fwrule="%{INT:fwrule}" outitf="%{NOTSPACE:outitf}" srcmac="%{MAC:srcmac}" srcip="%{IPV4:source_ip}" dstip="%{IPV4:destination_ip}" proto="%{WORD:protocol}" length="%{INT:length}" tos="%{DATA:tos}" prec="%{DATA:prec}" ttl="%{INT:ttl}" srcport="%{INT:srcport}" dstport="%{INT:dstport}" ","message", "<%{INT:fw_id}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="%{DATA:name}" action="%{WORD:action}" fwrule="%{INT:fwrule}" initf="%{NOTSPACE:initf}" outitf="%{NOTSPACE:outitf}" srcmac="%{MAC:srcmac}" dstmac="%{MAC:dstmac}" srcip="%{IPV4:source_ip}" dstip="%{IPV4:destination_ip}" proto="%{WORD:protocol}" length="%{INT:length}" tos="%{DATA:tos}" prec="%{DATA:prec}" ttl="%{INT:ttl}" srcport="%{INT:srcport}" dstport="%{INT:dstport}" ","message", "<%{INT:fw_id}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="%{DATA:name}" action="%{WORD:action}" fwrule="%{INT:fwrule}" initf="%{NOTSPACE:initf}" outitf="%{NOTSPACE:outitf}" srcmac="%{MAC:srcmac}" dstmac="%{MAC:dstmac}" srcip="%{IPV4:source_ip}" dstip="%{IPV4:destination_ip}" proto="%{WORD:protocol}" length="%{INT:length}" tos="%{DATA:tos}" prec="%{DATA:prec}" ttl="%{INT:ttl}" srcport="%{INT:srcport}" dstport="%{INT:dstport}" tcpflags="%{DATA:tcpflags}" ","message", "<%{INT:fw_id}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="%{DATA:name}" action="%{WORD:action}" fwrule="%{INT:fwrule}" initf="%{NOTSPACE:initf}" srcmac="%{MAC:srcmac}" dstmac="%{MAC:dstmac}" srcip="%{IPV4:source_ip}" dstip="%{IPV4:destination_ip}" proto="%{WORD:protocol}" length="%{INT:length}" tos="%{DATA:tos}" prec="%{DATA:prec}" ttl="%{INT:ttl}" srcport="%{INT:srcport}" dstport="%{INT:dstport}" "]
}
mutate {
remove_tag => "_grokparsefailure"
add_tag => "ulogd" }
geoip {
source => "destination_ip"
target => "geoip"
database => "/etc/logstash/GeoLite2-City.mmdb"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
mutate {
convert => [ "[geoip][coordinates]", "float"]
}
}
...
}

And an example of a progressed log:

t action accept, accept, accept
t destination_ip 2.16.184.210, 2.16.184.210, 2.16.184.210
t dstmac 00:50:56:a4:85:48, 00:50:56:a4:85:48, 00:50:56:a4:85:48
t dstport 443, 443, 443
t fw_id 30, 30, 30
t fwrule 1, 1, 1
...
t id 2002, 2002, 2002
t initf eth0, eth0, eth0
t length 211, 211, 211
t logsource utm, utm, utm
t name Packet accepted, Packet accepted, Packet accepted
t outitf eth0, eth0, eth0
t pid 4561, 4561, 4561
t prec 0x00, 0x00, 0x00
t program ulogd, ulogd, ulogd
t protocol 6, 6, 6
t severity info, info, info
t source_ip 10.10.70.105, 10.10.70.105, 10.10.70.105
t srcmac 00:50:56:a4:3e:87, 00:50:56:a4:3e:87, 00:50:56:a4:3e:87
t srcport 38948, 38948, 38948
t sub packetfilter, packetfilter, packetfilter
t sys SecureNet, SecureNet, SecureNet
t timestamp 2017:01:24-09:18:35, 2017:01:24-09:18:35, 2017:01:24-09:18:35
t tos 0x00, 0x00, 0x00
t ttl 63, 63, 63

You have set break_on_match to false, which will cause all grok expressions to evaluate and each will extract fields. It does however look like a large part of your log line is a key-value list. Why don't you use grok to capture the contents of the full key-value list into a field and then simply apply the kv filter to it to extract the fields? This should better handle the case when parameters show up in the wrong order and may very well be more efficient as well.

1 Like

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.