Hi,
there is definitely something wrong.
sample code:
input {
generator { count => 1 lines => [ 'Aug 18 01:02:03 asd ecelerity[12345]: 1534642187|0a8018c9-ab9ff700000058d7-22-5b78c80bd67f|ACCEPT|10.0.0.1:12345', 'Aug 18 01:02:03 asd bmserver[12345]: 1534642187|0a8018c9-ab9ff700000058d7-22-5b78c80bd67f|EHLO|mx1.asd.dd' ] }
}
filter {
grok {
break_on_match => true
pattern_definitions => {
"UID" => "[0-9a-f]{8}-[0-9a-f]{16}-[0-9a-f]{2}-[0-9a-f]{12}"
"ACCEPT" => "ACCEPT"
"ACCEPTIPPORT" => "%{IP:accept_ip}:%{NUMBER:accept_port}"
"ACCEPTIPPORT_OBJ" => "%{IP:[accept][accept_ip]}:%{NUMBER:[accept][accept_port]}"
"ACCEPT_OBJ_PTRN" => "%{NUMBER:[accept][date]}\|%{UID:uid}\|%{ACCEPT:action}\|%{ACCEPTIPPORT_OBJ:[accept][accept_ip_port]}"
"EHLO" => "EHLO"
"EHLO_OBJ_PTRN" => "%{NUMBER:[ehlo][date]}\|%{UID:uid}\|%{EHLO:action}\|%{GREEDYDATA:[ehlo][ehlo_host]}"
}
match => { "message" => "%{ACCEPT_OBJ_PTRN}"}
#match => { "message" => "%{EHLO_OBJ_PTRN}"}
#match => { "message" => "%{NUMBER:date}\|%{UID:uid}\|%{GREEDYDATA:_else}"}
}
if [_else] {
# drop {}
mutate {add_field => { "action" => "PARSE_ERROR" }}
}
}
output {
stdout { codec => rubydebug }
}
Gives:
{
"@version" => "1",
"action" => "ACCEPT",
"sequence" => 0,
"accept" => {
"accept_ip_port" => "10.0.0.1:12345",
"accept_port" => "12345",
"date" => "1534642187",
"accept_ip" => "10.0.0.1"
},
"uid" => "0a8018c9-ab9ff700000058d7-22-5b78c80bd67f",
"@timestamp" => 2019-04-21T04:09:18.452Z,
"host" => "feeder.acme.com",
"message" => "Aug 18 01:02:03 asd ecelerity[12345]: 1534642187|0a8018c9-ab9ff700000058d7-22-5b78c80bd67f|ACCEPT|10.0.0.1:12345"
}
{
"@version" => "1",
"sequence" => 0,
"@timestamp" => 2019-04-21T04:09:18.452Z,
"tags" => [
[0] "_grokparsefailure"
],
"host" => "feeder.acme.com",
"message" => "Aug 18 01:02:03 asd bmserver[12345]: 1534642187|0a8018c9-ab9ff700000058d7-22-5b78c80bd67f|EHLO|mx1.asd.dd"
}
This time _grokparsefailure is expected because we don't have matching pattern but if we uncomment the second match logstash will generate:
{
"@version" => "1",
"sequence" => 0,
"@timestamp" => 2019-04-21T04:10:24.371Z,
"tags" => [
[0] "_grokparsefailure"
],
"host" => "feeder.acme.com",
"message" => "Aug 18 01:02:03 asd ecelerity[12345]: 1534642187|0a8018c9-ab9ff700000058d7-22-5b78c80bd67f|ACCEPT|10.0.0.1:12345"
}
{
"@version" => "1",
"action" => "EHLO",
"sequence" => 0,
"uid" => "0a8018c9-ab9ff700000058d7-22-5b78c80bd67f",
"ehlo" => {
"ehlo_host" => "mx1.asd.dd",
"date" => "1534642187"
},
"@timestamp" => 2019-04-21T04:10:24.371Z,
"host" => "feeder.acme.com",
"message" => "Aug 18 01:02:03 asd bmserver[12345]: 1534642187|0a8018c9-ab9ff700000058d7-22-5b78c80bd67f|EHLO|mx1.asd.dd"
}
First _grokparsefailure is not expected because break_on_match is set.
If we uncomment third line logstash will generate:
{
"@version" => "1",
"action" => "PARSE_ERROR",
"sequence" => 0,
"uid" => "0a8018c9-ab9ff700000058d7-22-5b78c80bd67f",
"_else" => "EHLO|mx1.asd.dd",
"@timestamp" => 2019-04-21T04:14:15.872Z,
"host" => "feeder.acme.com",
"message" => "Aug 18 01:02:03 asd bmserver[12345]: 1534642187|0a8018c9-ab9ff700000058d7-22-5b78c80bd67f|EHLO|mx1.asd.dd",
"date" => "1534642187"
}
{
"@version" => "1",
"action" => "PARSE_ERROR",
"sequence" => 0,
"uid" => "0a8018c9-ab9ff700000058d7-22-5b78c80bd67f",
"_else" => "ACCEPT|10.0.0.1:12345",
"@timestamp" => 2019-04-21T04:14:15.872Z,
"host" => "feeder.acme.com",
"message" => "Aug 18 01:02:03 asd ecelerity[12345]: 1534642187|0a8018c9-ab9ff700000058d7-22-5b78c80bd67f|ACCEPT|10.0.0.1:12345",
"date" => "1534642187"
}
According to documentation logstash should break on first match in this grok filter but is going further and matches only on last.
This code used to work in logstash 6.x. I didn't find anything interesting in breaking changes so maybe I'm missing something... or its a bug indeed.
Cheers