Joelp
(Joelp)
June 23, 2017, 6:23am
1
after update logstash to 5.4.2, I have an issue with parsing and performance of course:
[2017-06-22T09:46:42,039][ERROR][logstash.filters.useragent] Uknown error while parsing user agent data {:exception=>java.lang.StringIndexOutOfBoundsException: String index out of range: 65, :field=>"useragent", :event=>2017-06-22T07:46:38.575Z wl94-wv17 - [22/Jun/2017:09:46:38 +0200] 178.41.44.246 46.28.106.25 347880 200 150 HTTP cs-.eu www.cs- .eu "POST /?do=NewMessages HTTP/1.1" "http://www.cs-.eu/exchange/profile/message/663" "Mozilla/5.0 (Windows NT 6.1; rv:53.0) Gecko/20100101 Firefox/53.0"}
[2017-06-22T09:46:44,965][ERROR][logstash.filters.useragent] Uknown error while parsing user agent data {:exception=>java.lang.IllegalStateException: No match found, :field=>"useragent", :event=>2017-06-22T07:46:44.326Z wl8-f21 - [22/Jun/2017:09:46:43 +0200] 66.249.93.221 46.28.105.54 8077 200 6920 HTTP a y.cz ay.cz "GET /citaty-o-lasce/ HTTP/1.1" "http://a y.cz/ " "Mozilla/5.0 (Linux; Android 7.0; EVA-L09 Build/HUAWEIEVA-L09) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.36"}
[2017-06-22T09:46:45,911][ERROR][logstash.filters.useragent] Uknown error while parsing user agent data {:exception=>java.lang.StringIndexOutOfBoundsException: String index out of range: 83, :field=>"useragent", :event=>2017-06-22T07:46:42.766Z wl38-f168 - [22/Jun/2017:09:46:42 +0200] 188.165.122.164 46.28.105.94 96880 200 258597 HTTP es.cz hpsport-ss.cz "GET /img/hp-s****s-1438458562.jpg HTTP/1.1" "-" "Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko"}
What does your config look like?
Joelp
(Joelp)
June 23, 2017, 9:17am
3
Apache filter:
filter {
mutate { add_tag => "master2-access" }
if [type] == "apache-access" {
if "beats_input_codec_plain_applied" in [tags] {
mutate {
remove_tag => ["beats_input_codec_plain_applied"]
}
}
grok {
match => { "message" => "(?:%{NOTSPACE:request_id}|-) \[%{HTTPDATE:request_date}\] %{IP:client_ip} %{IP:local_ip} %{NUMBER:time_to_serve:int} (?:%{NUMBER:http_response}|-) (?:%{NUMBER:size_of_response}|-) %{WORD:http_proto} %{URIHOST:virtualhost}(?: %{URIHOST:domain} \"(?:(%{WORD:http_method} %{GREEDYDATA:http_request} HTTP/%{NUMBER:http_version})|-)\" \"(?:%{GREEDYDATA:referrer}|-)\" \"(?:%{GREEDYDATA:useragent}|-)\"|)" }
}
grok {
match => { "client_ip" => "(?<client_ip_subnetipv4>\d{1,3}\.\d{1,3})\.\d{1,3}\.\d{1,3}"}
add_field => { "client_ip_subnet" => "%{client_ip_subnetipv4}.0.0/16"}
remove_field => [ "client_ip_subnetipv4" ]
add_tag => [ "ipv4" ]
}
grok {
match => { "client_ip" => "(?<client_ip_subnetipv6>[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4}):[0-9a-f]{1,4}" }
add_field => { "client_ip_subnet" => "%{client_ip_subnetipv6}::/64"}
remove_field => [ "client_ip_subnetipv6" ]
add_tag => [ "ipv6" ]
}
grok {
match => { "client_ip" => "(?<client_ip_subnetipv6>[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4})::" }
add_field => { "client_ip_subnet" => "%{client_ip_subnetipv6}:0::/64"}
remove_field => [ "client_ip_subnetipv6" ]
add_tag => [ "ipv6" ]
}
useragent {
source => "useragent"
}
geoip {
source => "client_ip"
}
mutate {
rename => { "major" => "client_prowser_version_major" }
rename => { "minor" => "client_prowser_version_minor" }
rename => { "patch" => "client_prowser_version_patch" }
rename => { "name" => "client_prowser_name" }
rename => { "device" => "client_device" }
rename => { "os" => "client_os" }
rename => { "os_name" => "client_os_name" }
rename => { "os_major" => "client_os_major" }
rename => { "os_minor" => "client_os_minor" }
}
date {
match => [ "request_date", "dd/MMM/YYYY:HH:mm:ss Z" ]
}
}
}
filter {
if [type] == "apache-error" {
grok {
patterns_dir => ["/data/logstash/paterns"]
match => { "message" => "\[%{APACHE_ERRORLOG_DATE:request_date}\] \[%{WORD:modul}:%{LOGLEVEL:loglevel}\] \[pid %{NUMBER:pid}:tid %{NUMBER:tid}\] (?:%{GREEDYDATA:error_lowlevel} |)(?:(\[id %{NOTSPACE:request_id}\] )|)\[client %{IP:client_ip}:%{POSINT:client_ip_port}\]" }
match => {"message" => "\[%{APACHE_ERRORLOG_DATE:request_date}\] \[%{WORD:modul}:%{LOGLEVEL:loglevel}\] \[pid %{NUMBER:pid}:tid %{NUMBER:tid}\] %{GREEDYDATA:error_lowlevel}"}
}
date {
match => [ "request_date", 'EEE MMM dd HH:mm:ss.SSS YYYY' ]
}
}
}
What does an error event look like if you output it to stdout with a rubydebug codec?
Joelp
(Joelp)
June 26, 2017, 8:45am
5
If "Uknown error while parsing user agent" in logstashs log file, there is nothing on stdout.
I tryed debug level, bun nothing usable in log.
I have tried to reproduce it, but it works fine for me.
Joelp
(Joelp)
June 26, 2017, 1:57pm
7
Maybe because this error is not in every log. Is there something else what can I provide?
I will try to make test case log file and provide it to you. Is here any way to send you full log? I dont want to paste here our customer logs publicly.
avkghost
(Andrey Kozeletsky)
June 29, 2017, 5:59pm
8
Hello,
I have same problem.
Index is varying. For example: 47, 86, 90
For example:
user-agent with string "FeedMediaSDK/Android/v4.3.3 Dalvik/2.1.0 (Linux; U; Android 7.0; LGMS550 Build/NRD90U)" parsed with error
19:55:09.379 [[main]>worker2] ERROR logstash.filters.useragent - Uknown error while parsing user agent data {:exception=>java.lang.StringIndexOutOfBoundsException: String index out of range: 47, :field=>"[req][headers][user-agent]", :event=>2017-06-01T06:33:13.369Z api-server-* HTTP POST /api/v2/session}
user-agent "FeedMediaSDK/3.0.4 (iPhone; iOS; 10.3.2; en_US)" parsed correctly
Filter:
filter {
if ([req][headers][user-agent] and ([req][headers][user-agent]!="" or [req][headers][user-agent]!="""")) {
useragent {
source => "[req][headers][user-agent]"
target => "ua"
}
}
}
I assume a problem with string length in useragent plugin.
How length is the source field?
system
(system)
Closed
July 27, 2017, 5:59pm
9
This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.