Exception on parsing user agent with 5.4.2

after update logstash to 5.4.2, I have an issue with parsing and performance of course:

[2017-06-22T09:46:42,039][ERROR][logstash.filters.useragent] Uknown error while parsing user agent data {:exception=>java.lang.StringIndexOutOfBoundsException: String index out of range: 65, :field=>"useragent", :event=>2017-06-22T07:46:38.575Z wl94-wv17 - [22/Jun/2017:09:46:38 +0200] 178.41.44.246 46.28.106.25 347880 200 150 HTTP cs-.eu www.cs-.eu "POST /?do=NewMessages HTTP/1.1" "http://www.cs-.eu/exchange/profile/message/663" "Mozilla/5.0 (Windows NT 6.1; rv:53.0) Gecko/20100101 Firefox/53.0"}
[2017-06-22T09:46:44,965][ERROR][logstash.filters.useragent] Uknown error while parsing user agent data {:exception=>java.lang.IllegalStateException: No match found, :field=>"useragent", :event=>2017-06-22T07:46:44.326Z wl8-f21 - [22/Jun/2017:09:46:43 +0200] 66.249.93.221 46.28.105.54 8077 200 6920 HTTP a
y.cz ay.cz "GET /citaty-o-lasce/ HTTP/1.1" "http://ay.cz/" "Mozilla/5.0 (Linux; Android 7.0; EVA-L09 Build/HUAWEIEVA-L09) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.36"}
[2017-06-22T09:46:45,911][ERROR][logstash.filters.useragent] Uknown error while parsing user agent data {:exception=>java.lang.StringIndexOutOfBoundsException: String index out of range: 83, :field=>"useragent", :event=>2017-06-22T07:46:42.766Z wl38-f168 - [22/Jun/2017:09:46:42 +0200] 188.165.122.164 46.28.105.94 96880 200 258597 HTTP es.cz hpsport-ss.cz "GET /img/hp-s****s-1438458562.jpg HTTP/1.1" "-" "Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko"}

What does your config look like?

Apache filter:

filter {
mutate { add_tag => "master2-access" }

if [type] == "apache-access" {
	if "beats_input_codec_plain_applied" in [tags] {
	mutate {
		remove_tag => ["beats_input_codec_plain_applied"]
	}
	}
	
	grok {
	match => { "message" => "(?:%{NOTSPACE:request_id}|-)	\[%{HTTPDATE:request_date}\]	%{IP:client_ip}	%{IP:local_ip}	%{NUMBER:time_to_serve:int}	(?:%{NUMBER:http_response}|-)	(?:%{NUMBER:size_of_response}|-)	%{WORD:http_proto}	%{URIHOST:virtualhost}(?:	%{URIHOST:domain}	\"(?:(%{WORD:http_method} %{GREEDYDATA:http_request} HTTP/%{NUMBER:http_version})|-)\"	\"(?:%{GREEDYDATA:referrer}|-)\"	\"(?:%{GREEDYDATA:useragent}|-)\"|)" }
	}
	
	grok {
	match => { "client_ip" => "(?<client_ip_subnetipv4>\d{1,3}\.\d{1,3})\.\d{1,3}\.\d{1,3}"}
	add_field => { "client_ip_subnet" => "%{client_ip_subnetipv4}.0.0/16"}
	remove_field => [ "client_ip_subnetipv4" ]
	add_tag => [ "ipv4" ]
	}
	
	grok {
	match => { "client_ip" => "(?<client_ip_subnetipv6>[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4}):[0-9a-f]{1,4}" }
	add_field => { "client_ip_subnet" => "%{client_ip_subnetipv6}::/64"}
	remove_field => [ "client_ip_subnetipv6" ]
	add_tag => [ "ipv6" ]
	}
	
	grok {
	match => { "client_ip" => "(?<client_ip_subnetipv6>[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4})::" }
	add_field => { "client_ip_subnet" => "%{client_ip_subnetipv6}:0::/64"}
	remove_field => [ "client_ip_subnetipv6" ]
	add_tag => [ "ipv6" ]
	}
	
	useragent {
		source => "useragent"
	}

	geoip {
	source => "client_ip"
	}
	mutate {
		rename => { "major" => "client_prowser_version_major" }
		rename => { "minor" => "client_prowser_version_minor" }
		rename => { "patch" => "client_prowser_version_patch" }
		rename => { "name" => "client_prowser_name" }
		rename => { "device" => "client_device" }
		rename => { "os" => "client_os" }
		rename => { "os_name" => "client_os_name" }
		rename => { "os_major" => "client_os_major" }
		rename => { "os_minor" => "client_os_minor" }
	}
	
	date {
	match => [ "request_date", "dd/MMM/YYYY:HH:mm:ss Z" ]
	}
}
}

filter {
if [type] == "apache-error" {
	grok {
	patterns_dir => ["/data/logstash/paterns"]
	match => { "message" => "\[%{APACHE_ERRORLOG_DATE:request_date}\] \[%{WORD:modul}:%{LOGLEVEL:loglevel}\] \[pid %{NUMBER:pid}:tid %{NUMBER:tid}\] (?:%{GREEDYDATA:error_lowlevel} |)(?:(\[id %{NOTSPACE:request_id}\] )|)\[client %{IP:client_ip}:%{POSINT:client_ip_port}\]" }
	match => {"message" => "\[%{APACHE_ERRORLOG_DATE:request_date}\] \[%{WORD:modul}:%{LOGLEVEL:loglevel}\] \[pid %{NUMBER:pid}:tid %{NUMBER:tid}\] %{GREEDYDATA:error_lowlevel}"}
	}
	
	date {
	match => [ "request_date", 'EEE MMM dd HH:mm:ss.SSS YYYY' ]
	}
}
}

What does an error event look like if you output it to stdout with a rubydebug codec?

If "Uknown error while parsing user agent" in logstashs log file, there is nothing on stdout.

I tryed debug level, bun nothing usable in log.

I have tried to reproduce it, but it works fine for me.

Maybe because this error is not in every log. Is there something else what can I provide?

I will try to make test case log file and provide it to you. Is here any way to send you full log? I dont want to paste here our customer logs publicly.

Hello,

I have same problem.
Index is varying. For example: 47, 86, 90

For example:

  • user-agent with string "FeedMediaSDK/Android/v4.3.3 Dalvik/2.1.0 (Linux; U; Android 7.0; LGMS550 Build/NRD90U)" parsed with error

19:55:09.379 [[main]>worker2] ERROR logstash.filters.useragent - Uknown error while parsing user agent data {:exception=>java.lang.StringIndexOutOfBoundsException: String index out of range: 47, :field=>"[req][headers][user-agent]", :event=>2017-06-01T06:33:13.369Z api-server-* HTTP POST /api/v2/session}

  • user-agent "FeedMediaSDK/3.0.4 (iPhone; iOS; 10.3.2; en_US)" parsed correctly

Filter:


filter {
if ([req][headers][user-agent] and ([req][headers][user-agent]!="" or [req][headers][user-agent]!="""")) {
useragent {
source => "[req][headers][user-agent]"
target => "ua"
}
}
}

I assume a problem with string length in useragent plugin.
How length is the source field?

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.