NGINX log parse issue


(Ramesh Pendela) #1

Hello community,

I am trying parse my custom NGINX logs and in the kibana Debugger I can see parsing is correct but when I apply in the pipeline I an seeing _grokoparsefailure with parsed log. I am not sure why it's failing and where it is failing,

Here is the example log

`10.9.4.255 - - 26/Oct/2018:12:57:15 -0500 \"/mercurycard/selectize/match.tpl.html\" 200 338 \"https://www.mercurycards.com/mercurycard/\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36\" \"70.235.249.232, 198.143.57.57, 10.9.4.255\"`

Grok Pattern

`%{IPORHOST:[nginx][access][local_ip]} - %{DATA:[nginx][access][user_name]} %{HTTPDATE:[nginx][access][time]} \"%{GREEDYDATA:[nginx][access][url]}\" %{NUMBER:[nginx][access][response_code]} %{NUMBER:[nginx][access][body_sent][bytes]} \"%{DATA:[nginx][access][referrer]}\" \"%{DATA:[nginx][access][agent]}\" \"%{IPORHOST:[nginx][access][remote_ip]}, %{IPORHOST:[nginx][access][elb_ip]}, %{IPORHOST:[nginx][access][local_ip1]}\"`

Jason Result:

`{
logstash    |           "@version" => "1",
logstash    |               "tags" => [
logstash    |         [0] "webapp-prod",
logstash    |         [1] "_grokparsefailure"
logstash    |     ],
logstash    |            "fileset" => {
logstash    |         "module" => "nginx",
logstash    |           "name" => "access"
logstash    |     },
logstash    |            "message" => "10.9.4.255 - - 26/Oct/2018:12:57:15 -0500 \"/mercurycard/selectize/match.tpl.html\" 200 338 \"https://www.mercurycards.com/mercurycard/\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36\" \"70.235.249.232, 198.143.57.57, 10.9.4.255\"",
logstash    |               "beat" => {
logstash    |         "hostname" => "ip-10-9-0-36",
logstash    |               "ip" => "10.9.0.36",
logstash    |          "version" => "6.4.2",
logstash    |             "name" => "ip-10-9-0-36"
logstash    |     },
logstash    |             "source" => "/var/log/nginx/access.log",
logstash    |              "input" => {
logstash    |         "type" => "log"
logstash    |     },
logstash    |              "nginx" => {
logstash    |         "access" => {
logstash    |                 "remote_ip" => "70.235.249.232",
logstash    |                 "body_sent" => {
logstash    |                 "bytes" => "338"
logstash    |             },
logstash    |             "response_code" => "200",
logstash    |                 "user_name" => "-",
logstash    |                       "url" => "/mercurycard/selectize/match.tpl.html",
logstash    |                "user_agent" => {
logstash    |                   "minor" => "0",
logstash    |                   "build" => "",
logstash    |                 "os_name" => "Windows 10",
logstash    |                   "patch" => "3497",
logstash    |                   "major" => "69",
logstash    |                    "name" => "Chrome",
logstash    |                      "os" => "Windows 10",
logstash    |                  "device" => "Other"
logstash    |             },
logstash    |                     "geoip" => {
logstash    |                             "ip" => "70.235.249.232",
logstash    |                  "country_code2" => "US",
logstash    |                       "location" => {
logstash    |                     "lat" => 37.751,
logstash    |                     "lon" => -97.822
logstash    |                 },
logstash    |                       "latitude" => 37.751,
logstash    |                   "country_name" => "United States",
logstash    |                      "longitude" => -97.822,
logstash    |                 "continent_code" => "NA",
logstash    |                  "country_code3" => "US"
logstash    |             },
logstash    |                    "elb_ip" => "198.143.57.57",
logstash    |                  "local_ip" => "10.9.4.255",
logstash    |                 "local_ip1" => "10.9.4.255",
logstash    |                  "referrer" => "https://www.mercurycards.com/mercurycard/"
logstash    |         }
logstash    |     },
logstash    |     "read_timestamp" => "2018-10-26T17:57:15.130Z",
logstash    |             "offset" => 38642780,
logstash    |         "@timestamp" => 2018-10-26T17:57:15.000Z,
logstash    |         "prospector" => {
logstash    |         "type" => "log"
logstash    |     }
logstash    | }`

I am using Default Filebeat template


(Christian Dahlqvist) #2

It seems to have parsed out the data correctly. Do you have any other config files in the directory or other grok expressions in your config file?


(Ramesh Pendela) #3

Hi @Christian_Dahlqvist I do have but how that is related to my log file when I have if loop condition based on fileset module

Here the complete if condition

`     if [fileset][module] == "nginx" {
        if [fileset][name] == "access" {
             grok {
                match => { "message" => ["%{IPORHOST:[nginx][access][local_ip]} - %{DATA:[nginx][access][user_name]} %{HTTPDATE:[nginx][access][time]} \"%{GREEDYDATA:[nginx][access][url]}\" %{NUMBER:[nginx][access][response_code]} %{NUMBER:[nginx][access][body_sent][bytes]} \"%{DATA:[nginx][access][referrer]}\" \"%{DATA:[nginx][access][agent]}\" \"%{IPORHOST:[nginx][access][remote_ip]}, %{IPORHOST:[nginx][access][elb_ip]}, %{IPORHOST:[nginx][access][local_ip1]}\""] }

                        }
                mutate {
                     add_field => { "read_timestamp" => "%{@timestamp}" }
                     }
                date {
                    match => [ "[nginx][access][time]", "dd/MMM/YYYY:H:m:s Z" ]
                     remove_field => "[nginx][access][time]"
                      }

                useragent {
                    source => "[nginx][access][agent]"
                    target => "[nginx][access][user_agent]"
                    remove_field => "[nginx][access][agent]"
                    }
                  geoip {
                    source => "[nginx][access][remote_ip]"
                    target => "[nginx][access][geoip]"
                        }
                    }
            }`

(Christian Dahlqvist) #4

All config files in the directory are concatenated, so if you have another file with a grok expression that is not guarded by conditionals, this could be applied and result in the _grokparsefailure that you are seeing. The one in the config you have shown seems to work fine.


(Ramesh Pendela) #5

Huh!! , seems thats true but this leads the confusion for end user that parsing is not done correctly for NGINX logs and I surprised it concatenated outside of NGINX log parse, it indicates any parse failures of any of my logs can lead all the NGINX or some other modules as _grokparsefailures.

And,

Also I would like to know the impact of _grokparsefailure on system performance.


(Ramesh Pendela) #6

@Christian_Dahlqvist I am still facing the same issue, as soon as I remove the conditional filter for other logs I am not seeing the grok parse failures but my grok for other logs is pretty simple and no errors in logstash

here is the complete filter section.

` filter {
    if "beats_input_codec_plain_applied" in [tags] {
       mutate {
        remove_tag => ["beats_input_codec_plain_applied"]
    }
}

mutate {
  remove_field => [ "host" ]
}

 mutate {
            copy => {"[@metadata][ip_address]" => "[beat][ip]"}
    }
    
      if [fileset][module] == "nginx" {
        if [fileset][name] == "access" {
             grok {
                match => { "message" => ["%{IPORHOST:[nginx][access][local_ip]} - %{DATA:[nginx][access][user_name]} %{HTTPDATE:[nginx][access][time]} \"%{GREEDYDATA:[nginx][access][url]}\" %{NUMBER:[nginx][access][response_code]} %{NUMBER:[nginx][access][body_sent][bytes]} \"%{DATA:[nginx][access][referrer]}\" \"%{DATA:[nginx][access][agent]}\" \"%{IPORHOST:[nginx][access][remote_ip]}, %{IPORHOST:[nginx][access][elb_ip]}, %{IPORHOST:[nginx][access][local_ip1]}\""] }

                        }
                mutate {
                     add_field => { "read_timestamp" => "%{@timestamp}" }
                     }
                date {
                    match => [ "[nginx][access][time]", "dd/MMM/YYYY:H:m:s Z" ]
                     remove_field => "[nginx][access][time]"
                      }

                useragent {
                    source => "[nginx][access][agent]"
                    target => "[nginx][access][user_agent]"
                    remove_field => "[nginx][access][agent]"
                    }
                  geoip {
                    source => "[nginx][access][remote_ip]"
                    target => "[nginx][access][geoip]"
                        }
                    }
        mutate {
        add_tag => [ "NGINX" ]
        }
    }
   if [fileset][module] != "nginx" or [fileset][module] != "system"
        {
                grok {
                    match => { "message" => "%{TIMESTAMP_ISO8601:timestamp}%{SPACE}%{WORD:Severity}%{SPACE}%{GREEDYDATA:message1}"}
                    remove_field => [ "message1" ]
             }
           }}

`

Here is my other log which I want to parse,

just to get severity level

2018-11-14 11:17:07,926 INFO  [http-nio-8080-exec-10] TokenService - User: user@email.com is an administrator. Skipping ip address validation

(system) #7

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.