NGINX log parse issue

Hello community,

I am trying parse my custom NGINX logs and in the kibana Debugger I can see parsing is correct but when I apply in the pipeline I an seeing _grokoparsefailure with parsed log. I am not sure why it's failing and where it is failing,

Here is the example log

`10.9.4.255 - - 26/Oct/2018:12:57:15 -0500 \"/mercurycard/selectize/match.tpl.html\" 200 338 \"https://www.mercurycards.com/mercurycard/\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36\" \"70.235.249.232, 198.143.57.57, 10.9.4.255\"`

Grok Pattern

`%{IPORHOST:[nginx][access][local_ip]} - %{DATA:[nginx][access][user_name]} %{HTTPDATE:[nginx][access][time]} \"%{GREEDYDATA:[nginx][access][url]}\" %{NUMBER:[nginx][access][response_code]} %{NUMBER:[nginx][access][body_sent][bytes]} \"%{DATA:[nginx][access][referrer]}\" \"%{DATA:[nginx][access][agent]}\" \"%{IPORHOST:[nginx][access][remote_ip]}, %{IPORHOST:[nginx][access][elb_ip]}, %{IPORHOST:[nginx][access][local_ip1]}\"`

Jason Result:

`{
logstash    |           "@version" => "1",
logstash    |               "tags" => [
logstash    |         [0] "webapp-prod",
logstash    |         [1] "_grokparsefailure"
logstash    |     ],
logstash    |            "fileset" => {
logstash    |         "module" => "nginx",
logstash    |           "name" => "access"
logstash    |     },
logstash    |            "message" => "10.9.4.255 - - 26/Oct/2018:12:57:15 -0500 \"/mercurycard/selectize/match.tpl.html\" 200 338 \"https://www.mercurycards.com/mercurycard/\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36\" \"70.235.249.232, 198.143.57.57, 10.9.4.255\"",
logstash    |               "beat" => {
logstash    |         "hostname" => "ip-10-9-0-36",
logstash    |               "ip" => "10.9.0.36",
logstash    |          "version" => "6.4.2",
logstash    |             "name" => "ip-10-9-0-36"
logstash    |     },
logstash    |             "source" => "/var/log/nginx/access.log",
logstash    |              "input" => {
logstash    |         "type" => "log"
logstash    |     },
logstash    |              "nginx" => {
logstash    |         "access" => {
logstash    |                 "remote_ip" => "70.235.249.232",
logstash    |                 "body_sent" => {
logstash    |                 "bytes" => "338"
logstash    |             },
logstash    |             "response_code" => "200",
logstash    |                 "user_name" => "-",
logstash    |                       "url" => "/mercurycard/selectize/match.tpl.html",
logstash    |                "user_agent" => {
logstash    |                   "minor" => "0",
logstash    |                   "build" => "",
logstash    |                 "os_name" => "Windows 10",
logstash    |                   "patch" => "3497",
logstash    |                   "major" => "69",
logstash    |                    "name" => "Chrome",
logstash    |                      "os" => "Windows 10",
logstash    |                  "device" => "Other"
logstash    |             },
logstash    |                     "geoip" => {
logstash    |                             "ip" => "70.235.249.232",
logstash    |                  "country_code2" => "US",
logstash    |                       "location" => {
logstash    |                     "lat" => 37.751,
logstash    |                     "lon" => -97.822
logstash    |                 },
logstash    |                       "latitude" => 37.751,
logstash    |                   "country_name" => "United States",
logstash    |                      "longitude" => -97.822,
logstash    |                 "continent_code" => "NA",
logstash    |                  "country_code3" => "US"
logstash    |             },
logstash    |                    "elb_ip" => "198.143.57.57",
logstash    |                  "local_ip" => "10.9.4.255",
logstash    |                 "local_ip1" => "10.9.4.255",
logstash    |                  "referrer" => "https://www.mercurycards.com/mercurycard/"
logstash    |         }
logstash    |     },
logstash    |     "read_timestamp" => "2018-10-26T17:57:15.130Z",
logstash    |             "offset" => 38642780,
logstash    |         "@timestamp" => 2018-10-26T17:57:15.000Z,
logstash    |         "prospector" => {
logstash    |         "type" => "log"
logstash    |     }
logstash    | }`

I am using Default Filebeat template

It seems to have parsed out the data correctly. Do you have any other config files in the directory or other grok expressions in your config file?

Hi @Christian_Dahlqvist I do have but how that is related to my log file when I have if loop condition based on fileset module

Here the complete if condition

`     if [fileset][module] == "nginx" {
        if [fileset][name] == "access" {
             grok {
                match => { "message" => ["%{IPORHOST:[nginx][access][local_ip]} - %{DATA:[nginx][access][user_name]} %{HTTPDATE:[nginx][access][time]} \"%{GREEDYDATA:[nginx][access][url]}\" %{NUMBER:[nginx][access][response_code]} %{NUMBER:[nginx][access][body_sent][bytes]} \"%{DATA:[nginx][access][referrer]}\" \"%{DATA:[nginx][access][agent]}\" \"%{IPORHOST:[nginx][access][remote_ip]}, %{IPORHOST:[nginx][access][elb_ip]}, %{IPORHOST:[nginx][access][local_ip1]}\""] }

                        }
                mutate {
                     add_field => { "read_timestamp" => "%{@timestamp}" }
                     }
                date {
                    match => [ "[nginx][access][time]", "dd/MMM/YYYY:H:m:s Z" ]
                     remove_field => "[nginx][access][time]"
                      }

                useragent {
                    source => "[nginx][access][agent]"
                    target => "[nginx][access][user_agent]"
                    remove_field => "[nginx][access][agent]"
                    }
                  geoip {
                    source => "[nginx][access][remote_ip]"
                    target => "[nginx][access][geoip]"
                        }
                    }
            }`

All config files in the directory are concatenated, so if you have another file with a grok expression that is not guarded by conditionals, this could be applied and result in the _grokparsefailure that you are seeing. The one in the config you have shown seems to work fine.

Huh!! , seems thats true but this leads the confusion for end user that parsing is not done correctly for NGINX logs and I surprised it concatenated outside of NGINX log parse, it indicates any parse failures of any of my logs can lead all the NGINX or some other modules as _grokparsefailures.

And,

Also I would like to know the impact of _grokparsefailure on system performance.

@Christian_Dahlqvist I am still facing the same issue, as soon as I remove the conditional filter for other logs I am not seeing the grok parse failures but my grok for other logs is pretty simple and no errors in logstash

here is the complete filter section.

` filter {
    if "beats_input_codec_plain_applied" in [tags] {
       mutate {
        remove_tag => ["beats_input_codec_plain_applied"]
    }
}

mutate {
  remove_field => [ "host" ]
}

 mutate {
            copy => {"[@metadata][ip_address]" => "[beat][ip]"}
    }
    
      if [fileset][module] == "nginx" {
        if [fileset][name] == "access" {
             grok {
                match => { "message" => ["%{IPORHOST:[nginx][access][local_ip]} - %{DATA:[nginx][access][user_name]} %{HTTPDATE:[nginx][access][time]} \"%{GREEDYDATA:[nginx][access][url]}\" %{NUMBER:[nginx][access][response_code]} %{NUMBER:[nginx][access][body_sent][bytes]} \"%{DATA:[nginx][access][referrer]}\" \"%{DATA:[nginx][access][agent]}\" \"%{IPORHOST:[nginx][access][remote_ip]}, %{IPORHOST:[nginx][access][elb_ip]}, %{IPORHOST:[nginx][access][local_ip1]}\""] }

                        }
                mutate {
                     add_field => { "read_timestamp" => "%{@timestamp}" }
                     }
                date {
                    match => [ "[nginx][access][time]", "dd/MMM/YYYY:H:m:s Z" ]
                     remove_field => "[nginx][access][time]"
                      }

                useragent {
                    source => "[nginx][access][agent]"
                    target => "[nginx][access][user_agent]"
                    remove_field => "[nginx][access][agent]"
                    }
                  geoip {
                    source => "[nginx][access][remote_ip]"
                    target => "[nginx][access][geoip]"
                        }
                    }
        mutate {
        add_tag => [ "NGINX" ]
        }
    }
   if [fileset][module] != "nginx" or [fileset][module] != "system"
        {
                grok {
                    match => { "message" => "%{TIMESTAMP_ISO8601:timestamp}%{SPACE}%{WORD:Severity}%{SPACE}%{GREEDYDATA:message1}"}
                    remove_field => [ "message1" ]
             }
           }}

`

Here is my other log which I want to parse,

just to get severity level

2018-11-14 11:17:07,926 INFO  [http-nio-8080-exec-10] TokenService - User: user@email.com is an administrator. Skipping ip address validation

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.