Hello logstash community.
I have a problem with useragent filter. It is pailful slow. :(( Works but really slow. Same problem with the cidr filter.
If I use the filter I may get maybe 3 events per second if I am lucky.
I am running Logstash 1.5.1 on CentOS 6.6 with java-1.8.0-openjdk
VM: 32 VCPU with 32 workers for logstash and 8G RAM
MY setup: LSF ---> REDIS ---> LSI ----> ES
If I don't use useragent filter ... the LSI it is really fast.
Here is my filter config:
filter {
if [type] == "ironport" {
grok {
match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:[%{POSINT:syslog_pid}])?: %{GREEDYDATA:syslog_message}" }
add_field => [ "received_at", "%{@timestamp}" ]
}
syslog_pri { }
date {
match => [ "syslog_timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss" ]
}
mutate {
replace => [ "@message", "%{syslog_message}" ]
}
if [syslog_program] == "SECLOG" {
grok {
match => {
"syslog_message" => "%{WORD:severity}: %{IP:user_ip} (-|"%{WORD:domain}\%{NOTSPACE:user}@%{WORD:realm}") %{NOTSPACE} [%{HTTPDATE}] "%{WORD:request} (|%{URIPROTO:url_proto}://)(?:%{URIHOST:url_host})?(?:%{URIPATH:url_path}(?:%{URIPARAM:url_param})?)?" %{NUMBER:response} (?:%{NUMBER:bytes}|-) %{NOTSPACE:result_code}:%{NOTSPACE:code_value} %{NUMBER} %{DATA:acl_decision_tag}-%{DATA:access_policy}-%{DATA:identity}-%{DATA} <%{GREEDYDATA:source_csv}> - %{NOTSPACE:url_ip}, %{WORD:auth_method}?, %{GREEDYDATA:user_agent_browser}" }
}
csv {
source => "source_csv"
columns => ["category","reputation_score","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove","app_name","app_type","remove","remove","Average_bandwidth_KB_sec","remove","remove","remove","remove","remove","remove","remove","remove","remove","remove"]
remove_field => [ "remove" ]
}
if [syslog_hostname] == "proxy01" or [syslog_hostname] == "proxy02" {
useragent {
source => "user_agent_browser"
}
}
mutate{
remove_field => [ "source_csv", "message", "host", "@version", "syslog_message", "syslog_program" ]
}
if "_grokparsefailure" in [tags] {
drop { }
}
}
}
}
Here is a sample log:
Info: 172.16.92.32 - - [01/Jul/2015:20:41:07 -0400] "GET http://live.lemde.fr/mux.json" 304 0 TCP_MISS:DIRECT 4 ALLOW_WBRS_12-DefaultGroup-A2TZ.noAuth.ID-NONE-NONE-NONE-DefaultGroup <IW_news,3.0,1,"-",-,-,-,-,"-",-,-,-,"-",-,-,"-","-",-,-,IW_news,-,"-","-","Unknown","Unknown","-","-",986.00,0,-,"-","-",-,"-",-,-,"-","-"> - 72.21.91.8, NONE, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36"
I get what I need but really really slow.
name Chrome
os Windows 7
os_name Windows 7
patch 2357
Any suggestions ?
Regards,
Gabriel