Clientip field is not identified by logstash (Solved)

Hi for all

I set the logstash filter with following command:

filter {
          if [type] == "apache-error" {
            grok {
                     match => ["message", "\[%{WORD:dayname} %{WORD:month} %{DATA:day} %{DATA:hour}:%{DATA:minute}:%{DATA:second} %{YEAR:year}\] \[%{NOTSPACE:loglevel}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:message}"]
                     overwrite => [ "message" ]
                }

            mutate {
                     convert => [ "[geoip][coordinates]", "float"]
                     add_field => {
                                     "time_stamp" => "%{day}/%{month}/%{year}:%{hour}:%{minute}:%{second}"
                                     "clientip" => "%{clientip}"
                                  }
                  }

            geoip {
                   source => "clientip"
                   target => "geoip"
                   add_tag => [ "apache-geoip" ]
                   database => "/etc/logstash/GeoLiteCity.dat"
                   add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
                   add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}"  ]
                 }

            date {
                 match => ["time_stamp", "dd/MMM/YYYY:HH:mm:ss"]
                 remove_field => [ "time_stamp","day","dayname","month","hour","minute","second","year"]
               }
   }
}

however the client ip field can not identify the client IP, so the GeoIP does not work.

Below is the Kibana output

@timestamp 	November 1st 2016, 16:55:16.968
t@version 	1
t_id	  	AVghgkdlw0uQaifig-SL
t_index	  	filebeat-2016.11.01
#_score	  	
t_type	  	apache-error
tbeat.hostname 	reverse-apache
tbeat.name	reverse-apache
?clientip	%{clientip}
#count	  	1
?day	  	01
?dayname	Tue
?fields	  	- 
thost	  	reverse-apache
?hour	       16
tinput_type	log
?loglevel  	error
tmessage	[pid 30606:tid 140550112012032] [client 177.205.104.241:62805] [client 177.205.104.241] ModSecurity: Access denied with code 401 (phase 2). Operator EQ matched 0 at REQUEST_HEADERS. [file "/usr/local/modsecurity/crs/activated_rules/modsecurity_crs_21_protocol_anomalies.conf"] [line "47"] [id "960015"] [rev "1"] [msg "Request Missing an Accept Header"] [severity "NOTICE"] [ver "OWASP_CRS/2.2.9"] [maturity "9"] [accuracy "9"] [tag "OWASP_CRS/PROTOCOL_VIOLATION/MISSING_HEADER_ACCEPT"] [tag "WASCTC/WASC-21"] [tag "OWASP_TOP_10/A7"] [tag "PCI/6.5.10"] [hostname "www.xxx.com"] [uri "/index.php/component/search/"] [unique_id "WBjzHQpC--kAAHeOXa4AAACG"]

Any suggestions for how to resolve this issue?

Thanks.

Please edit your post and format the whole configuration block as preformatted text using the toolbar button. I want to see exactly what your grok expression looks like.

Sorry for the lack of attention.

[client 177.205.104.241:62805] doesn't follow immediately after the loglevel so (?:\[client %{IPORHOST:clientip}\] ){0,1} won't match. Additionally, you're not including the port number after the client IP address in that expression.

You have a suggestion for code fix?
I could not see the solution.
For the enrichment of knowledge, because the part [client 177.205.104.241] is not interpreted by logstash?

Thanks.

After \[%{NOTSPACE:loglevel}\] add something like \[pid \d+:tid \d+\] \[client %{IP}:\d+\].

Hi Magnus,
I changed the code, however the clientip field does not show the client IP. See the new code and kibana output.

    filter {
              if [type] == "apache-error" {
                grok {
                         match => ["message", "\[%{WORD:dayname} %{WORD:month} %{DATA:day} %{DATA:hour}:%{DATA:minute}:%{DATA:second} %{YEAR:year}\] \[%{NOTSPACE:loglevel}\] \[pid \d+:tid \d+\] \[client %{IP}:\d+\]%{GREEDYDATA:message}"]
                         overwrite => [ "message" ]
                    }

                mutate {
                         convert => [ "[geoip][coordinates]", "float"]
                         add_field => {
                                         "time_stamp" => "%{day}/%{month}/%{year}:%{hour}:%{minute}:%{second}"
                                         "clientip" => "%{IP}:"
                                      }
                      }

                geoip {
                       source => "clientip"
                       target => "geoip"
                       add_tag => [ "apache-geoip" ]
                       database => "/etc/logstash/GeoLiteCity.dat"
                       add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
                       add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}"  ]
                     }

                date {
                     match => ["time_stamp", "dd/MMM/YYYY:HH:mm:ss"]
                     remove_field => [ "time_stamp","day","dayname","month","hour","minute","second","year"]
                   }
       }
    }

Kibana Output:

    @timestamp	November 2nd 2016, 11:48:51.801
    t@version	1
    t_id	  	AVglkDd9w0uQaifih78O
    t_index	  	filebeat-2016.11.02
    #_score	  	
    t_type	  	apache-error
    tbeat.hostname	reverse-apache
    tbeat.name	reverse-apache
    ?clientip	%{IP}:
    #count	  	1
    ?day	  	02
    ?dayname	Wed
    ?fields	  	- 
    thost	  	reverse-apache
    ?hour	  	11
    tinput_type	log
    ?loglevel	:error
    tmessage	[client 107.178.194.54] ModSecurity: Access denied with code 401 (phase 2). Operator EQ matched 0 at REQUEST_HEADERS. [file "/usr/local/modsecurity/crs/activated_rules/modsecurity_crs_21_protocol_anomalies.conf"] [line "47"] [id "960015"] [rev "1"] [msg "Request Missing an Accept Header"] [severity "NOTICE"] [ver "OWASP_CRS/2.2.9"] [maturity "9"] [accuracy "9"] [tag "OWASP_CRS/PROTOCOL_VIOLATION/MISSING_HEADER_ACCEPT"] [tag "WASCTC/WASC-21"] [tag "OWASP_TOP_10/A7"] [tag "PCI/6.5.10"] [hostname "www.xxx.com"] [uri "/"] [unique_id "WBn8zgpC--kAAAKsdRwAAAEW"]
    ?minute	  	48
    ?month	  	Nov
    #offset	  	2,158,956
    ?second	  	46.662236
    tsource	  	/var/log/apache2/xxx.com_error.log
    ttags	  	beats_input_codec_plain_applied, _dateparsefailure
    ?time_stamp	02/Nov/2016:11:48:46.662236
    ttype	  	apache-error
    ?year	  	2016

I also used the option "clientip" => "%{IP}:", but did not work

Another suggestion?:grin:

But now you're no longer attempting to capture the IP address into clientip. The addition I told you to add was just to deal with the other stuff. Compare below.

Log message:

... error [pid 30606:tid 140550112012032] [client 177.205.104.241:62805] [client 177.205.104.241] ModSecurity: ...

Grok expression:

... \[%{NOTSPACE:loglevel}\] \[pid \d+:tid \d+\] \[client %{IP}:\d+\]%{GREEDYDATA:message}

sorry Magnus, but I did not understand what you wrote and not what should be done.
Any changes that did not work.
I'm starting with Elastic stack world.

Thank you very much for your attention .

Magnus, I changed the sintax of Logstash filter plugin and got the following result:

filter {
          if [type] == "apache-error" {
            grok {
                      match => ["message", "\[%{WORD:dayname} %{WORD:month} %{DATA:day} %{DATA:hour}:%{DATA:minute}:%{DATA:second} %{YEAR:year}\] \[%{NOTSPACE:loglevel}\] \[pid \d+:tid \d+\] \[client %{IP:clientip}:\d+\] %{GREEDYDATA:message}"]

                     overwrite => [ "message" ]
                }

            mutate {
                     convert => [ "[geoip][coordinates]", "float"]
                     add_field => {
                                     "time_stamp" => "%{day}/%{month}/%{year}:%{hour}:%{minute}:%{second}"
                                     "clientip" => "%{clientip}"
                                  }
                  }

            geoip {
                   source => "clientip"
                   target => "geoip"
                   add_tag => [ "apache-geoip" ]
                   database => "/etc/logstash/GeoLiteCity.dat"
                   add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
                   add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}"  ]
                 }

            date {
                 match => ["time_stamp", "dd/MMM/YYYY:HH:mm:ss"]
                 remove_field => [ "time_stamp","day","dayname","month","hour","minute","second","year"]
               }
   }
}

The match sintax allowed extract the client IP, however the clientip field show the doble IP. See the Kibana output.

@timestamp	November 2nd 2016, 21:50:52.762
t@version	        1
t_id	                AVgnsFg1HAYfP2pKRUOB
t_index	        filebeat-2016.11.03
#_score	       1
t_type	apache-error
tbeat.hostname	reverse-apache
tbeat.name	       reverse-apache
?clientip	               66.249.88.60, 66.249.88.60
#count	              1
?day	                      02
?dayname	             Wed
?fields	   - 
?geoip.area_code	         650
?geoip.city_name	         Mountain View
?geoip.continent_code	 NA
?geoip.coordinates	         -122.0574, 37.41919999999999
?geoip.country_code2	 US
?geoip.country_code3	 USA
?geoip.country_name	 United States
?geoip.dma_code	         807
?geoip.ip	                         66.249.88.60
?geoip.latitude	                 37.41919999999999
geoip.location	                 -122.0574, 37.41919999999999
?geoip.longitude	         -122.0574
?geoip.postal_code	         94043
?geoip.real_region_name  California
?geoip.region_name	   CA
?geoip.timezone	           America/Los_Angeles
thost	                                   reverse-apache
?hour	                                   21
tinput_type	                   log
?loglevel	                          :error
tmessage	                          [client 66.249.88.60] ModSecurity: Access denied with code 401 (phase 4). Pattern match "^5\\\\d{2}$" at RESPONSE_STATUS. [file "/usr/local/modsecurity/crs/activated_rules/modsecurity_crs_50_outbound.conf"] [line "53"] [id "970901"] [rev "2"] [msg "The application is not available"] [data "Matched Data: 502 found within RESPONSE_STATUS: 502"] [severity "ERROR"] [ver "OWASP_CRS/2.2.9"] [maturity "9"] [accuracy "9"] [tag "WASCTC/WASC-13"] [tag "OWASP_TOP_10/A6"] [tag "PCI/6.5.6"] [hostname "www.xxx.com"] [uri "/sigh/consult/"] [unique_id "WBqJrQpC--kAAA0JcCsAAAGD"], referer: http://www.xxx.com/
?minute	  50
?month	  Nov
#offset	198,039,484
?second	  49.856841
tsource	/var/log/apache2/xxx.com_error.log
ttags	beats_input_codec_plain_applied, apache-geoip, _dateparsefailure
?time_stamp	  02/Nov/2016:21:50:49.856841
ttype	apache-error
?year	 2016

Do you have any suggestion to fix duplicity?

Thanks.

Remove this:

"clientip" => "%{clientip}"

All right.
Thank you very much for your help and attention.

Final syntax for Logstash filter plugin

filter {
          if [type] == "apache-error" {
            grok {
                      match => ["message", "\[%{WORD:dayname} %{WORD:month} %{DATA:day} %{DATA:hour}:%{DATA:minute}:%{DATA:second} %{YEAR:year}\] \[%{NOTSPACE:loglevel}\] \[pid \d+:tid \d+\] \[client %{IP:clientip}:\d+\] %{GREEDYDATA:message}"]

                     overwrite => [ "message" ]
                }

            mutate {
                     convert => [ "[geoip][coordinates]", "float"]
                     add_field => {
                                     "time_stamp" => "%{day}/%{month}/%{year}:%{hour}:%{minute}:%{second}"
                                    }
                  }

            geoip {
                   source => "clientip"
                   target => "geoip"
                   add_tag => [ "apache-geoip" ]
                   database => "/etc/logstash/GeoLiteCity.dat"
                   add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
                   add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}"  ]
                 }

            date {
                 match => ["time_stamp", "dd/MMM/YYYY:HH:mm:ss"]
                 remove_field => [ "time_stamp","day","dayname","month","hour","minute","second","year"]
               }
   }
}