[SOLVED] Grok - multiple match lines - set tag based on which grok match was used

I have a grok match statement with multiple matches:

grok {
  match => { "message" => [
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{INT:cisco_eventnum:int}: %{DATA:cisco_uptime}: \%SYS-%{INT:cisco_event_lvl}-%{DATA:cisco_event_st}: %{GREEDYDATA:syslog_message}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{INT:cisco_eventnum:int}: %{DATA:cisco_uptime}: \%PHY-%{INT:cisco_event_lvl}-%{DATA:cisco_event_st}: %{GREEDYDATA:syslog_message}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{INT:cisco_eventnum:int}: %{DATA:cisco_uptime}: \%%{DATA:cisco_event}-%{INT:cisco_event_lvl}-%{DATA:cisco_event_st}: Interface %{DATA:cisco_iface}, changed state to %{WORD:cisco_iface_state}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{INT:cisco_eventnum:int}: %{DATA:cisco_uptime}: \%%{DATA:cisco_event}-%{INT:cisco_event_lvl}-%{DATA:cisco_event_st}: Line protocol on Interface %{DATA:cisco_iface}, changed state to %{WORD:cisco_iface_state}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{INT:cisco_eventnum:int}: %{GREEDYDATA:syslog_message}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} invld>%{SYSLOGTIMESTAMP} %{SYSLOGHOST} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} \(\"%{DATA}\"\) %{WORD:syslog_program}: %{GREEDYDATA:syslog_message}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}\[\s*%{BASE10NUM:syslog_pid}\]: \(%{DATA:syslog_wincommitid}:%{DATA:syslog_winthreadid}:%{DATA:syslog_winchannelid}\) \[%{DATA:syslog_wineventid}:%{DATA:syslog_wintaskid}\.%{DATA:syslog_winopcode}\.%{DATA:syslog_winloglevel}\] %{DATA:syslog_wincommand}\.#012%n%n%{GREEDYDATA:syslog_winmessage}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}:\[\s*%{BASE10NUM:syslog_pid}\] %{GREEDYDATA:syslog_message}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} run-parts\(/etc/cron\..*\)\[%{NUMBER:syslog_pid} %{GREEDYDATA:syslog_message}",
  "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{GREEDYDATA:syslog_message}"
 ] }
  add_field => [ "received_at", "%{@timestamp}" ]
  add_field => [ "received_from", "%{host}" ]
  add_tag => [ "SYSLOG" ]
}

I'd like to set a tag based on which match was used.

Is this possible?

Found this https://discuss.elastic.co/t/is-there-a-way-to-tag-for-different-grok-matches/52785 and tried it. Seems to work so far. The bonus is after a while I can see which matches are the most used and reorder them in the conf file to reduce processing.

Here's a snippet as an example for others to use:

filter {
  if [document_type] == "syslog" {
    grok {
      match => { "message" =>
      "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} invld>%{SYSLOGTIMESTAMP} %{SYSLOGHOST} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}"
      }
    add_field => [ "received_at", "%{@timestamp}" ]
    add_field => [ "received_from", "%{host}" ]
    remove_tag => ["_grokparsefailure"]
    add_tag => [ "SYSLOG","SYSLOGGROK1" ]
    }
    if "_grokparsefailure" in [tags] {
      grok {
        match => { "message" =>
        "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} \(\"%{DATA}\"\) %{WORD:syslog_program}: %{GREEDYDATA:syslog_message}"
        }
      add_field => [ "received_at", "%{@timestamp}" ]
      add_field => [ "received_from", "%{host}" ]
      remove_tag => ["_grokparsefailure"]
      add_tag => [ "SYSLOG","SYSLOGGROK2" ]
      }
    }
    if "_grokparsefailure" in [tags] {
      grok {
        match => { "message" =>
        "<%{NONNEGINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}\[\s*%{BASE10NUM:syslog_pid}\]: \(%{DATA:syslog_wincommitid}:%{DATA:syslog_winthreadid}:%{DATA:syslog_winchannelid}\) \[%{DATA:syslog_wineventid}:%{DATA:syslog_wintaskid}\.%{DATA:syslog_winopcode}\.%{DATA:syslog_winloglevel}\] %{DATA:syslog_wincommand}\.#012%n%n%{GREEDYDATA:syslog_winmessage}"
        }
      add_field => [ "received_at", "%{@timestamp}" ]
      add_field => [ "received_from", "%{host}" ]
      remove_tag => ["_grokparsefailure"]
      add_tag => [ "SYSLOG","SYSLOGGROK3" ]
      }
    }
...

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.