Hey all! I'm having a weird issue where my pattern seems to be triggering and adding a field to the event twice in logstash. Have a look at a single event below:
TLDR:
It looks as though after grok successfully matches one pattern, it then ALSO tries the NEXT pattern after. I thought grok was supposed to break after the first match it found! Am I mistaken?
Here is an example of the data I am receiving:
{
"_index": "labhub-2019.02.01",
"logger": [
"o.e.c.l.o.TransientLabResultsProvider.searchTest",
"o.e.c.l.o.TransientLabResultsProvider.searchTest"
],
"time_taken": "1",
"log": {
"file": {
"path": "/var/lib/docker/containers/b6997b5bda80b44627c90ad3264287205758d983b39bdec16b1e79f3a66d6140/b6997b5bda80b44627c90ad3264287205758d983b39bdec16b1e79f3a66d6140-json.log"
}
},
"name": "whatever.org",
"containerized": true,
"id": "109d94327fc14970809522c34407f39f"
},
"prospector": {
"type": "docker"
},
"timestamp": [
"19-02-01 00:06:31,378",
"19-02-01 00:06:31,378"
],
"offset": 264850,
"source": "/var/lib/docker/containers/b6997b5bda80b44627c90ad3264287205758d983b39bdec16b1e79f3a66d6140/b6997b5bda80b44627c90ad3264287205758d983b39bdec16b1e79f3a66d6140-json.log",
"@timestamp": "2019-02-01T00:06:31.382Z",
"stream": "stdout",
"thread": [
"http-nio-8081-exec-1",
"http-nio-8081-exec-1"
],
"severity": [
"INFO",
"INFO"
],
"tags": [
"beats_input_codec_plain_applied",
"_grokparsefailure"
],
"applicationVersion": [
"1.0.0",
"1.0.0"
],
"user": [
"medly_dummy",
"medly_dummy"
],
"application": [
"LabHub",
"LabHub"
],
"message": "Query Stats: returning:[1], time taken:[1]ms",
"resultCount": "1",
"fields": {
"myEnv": "CST"
}
},
"fields": {
"@timestamp": [
"2019-02-01T00:06:31.382Z"
],
"timestamp": [
"0019-02-01T00:06:31.378Z",
"0019-02-01T00:06:31.378Z"
]
},
"highlight": {
"fields.env.keyword": [
"@kibana-highlighted-field@CST@/kibana-highlighted-field@"
]
},
"sort": [
1548979591382
]
}
You will notice that many fields are duplicated here.
Here is my logstash config's filter section:
filter {
grok {
patterns_dir => ["/usr/share/logstash/pipeline/patterns"]
match => { "message" => "%{CCAUDITEVENT}"}
}
grok {
patterns_dir => ["/usr/share/logstash/pipeline/patterns"]
match => { "message" => "%{LABHUBAUDIT}"}
}
grok {
patterns_dir => ["/usr/share/logstash/pipeline/patterns"]
match => { "message" => "%{LABHUBLOG}"}
overwrite => ["message"]
}
grok {
patterns_dir => ["/usr/share/logstash/pipeline/patterns"]
match => { "message" => "%{CCLOGMESSAGE}"}
overwrite => ["message"]
}
#Since we use the host for index routing, it must be lowercase
mutate {
lowercase => ["host"]
}
#Audit parsing
json {
source => "audit_event"
}
if [type] in ["AUDIT", "ANALYTICS"] {
geoip {
source => "clientIp"
}
}
}
And here is my patterns file.
CCUSER %{EMAILADDRESS}|%{USER}
CCMETADATA %{DATESTAMP:timestamp_new}\s+%{LOGLEVEL:severity}\s+\[%{DATA:thread}?\]\s+\[(%{CCUSER:username})?\] %{JAVACLASS:logger}
CCLOGMESSAGE %{CCMETADATA}\s+%{GREEDYDATA:message}
CCAUDITEVENT %{CCMETADATA}\s+AUDIT:%{GREEDYDATA:audit_event}
LABHUBMETADATA %{DATESTAMP:timestamp}\s+\[%{WORD:application}?\|%{DATA:applicationVersion}?\|%{CCUSER:user}?\]\s+\[%{DATA:thread}\]\s+%{LOGLEVEL:severity}\s+%{JAVACLASS:logger}
LABHUBLOG %{LABHUBMETADATA} -\s+%{GREEDYDATA:message}
LABHUBAUDIT %{LABHUBMETADATA} -\s+Query Stats: returning:\[%{NUMBER:resultCount}\], time taken:\[%{NUMBER:time_taken}\]ms
It looks as though after grok successfully matches the LABHUBAUDIT pattern, it then ALSO tries the LABHUBLOG pattern after. I thought grok was supposed to break after the first match it found! Am I mistaken?
Cheers,