Hi I am using logstash 5.1.1 to parse file size of 2-3G (7-8M records) and there are thousands of them from the S3 bucket. My current speed is barely one file/day (260 logs for sec), in m3-xlarge aws instance(4 cpu, 15G memory), with cpus' utilization 97%. I start with -w 30, which means 30 workers. I am sending to one ES server of basic setting. I am using quite a lot of filters, shown below.
Is there any suggestion on to speed up my parsing speed?
Does a larger instance server for logstash helps?
I don't think i can just start another logstash instance coz it will mess up the sincedb file.
Thanks!!
filter {
grok{
break_on_match => false
match => { "message" => "%{SYSLOGBASE} %{GREEDYDATA:message}" }
overwrite => [ "message" ]
}
json {
source => "message"
}
date{
match => ["date", "YYYY-MM-dd HH:mm:ss", "ISO8601" ]
timezone => "UTC"
target => "date"
}
urldecode{
field => "url"
}
mutate {
lowercase => [ "url" ]
gsub => [ "url", "[\\"]", ""]
}
kv {
source => "url"
field_split => "&?"
}
grok{
break_on_match => false
match => { "url" => "%{URI:uri}id/%{GREEDYDATA:image_id}" }
match => { "url" => "%{URI:uri}category/%{GREEDYDATA:category_id}" }
match => { "url" => "%{URI:uri}tag/%{GREEDYDATA:tag_content}" }
match => { "image_id" => "%{NUMBER:image_id}\?%{GREEDYDATA:uri_else}" }
overwrite => "image_id"
match => { "category_id" => "%{NUMBER:category_id}\?%{GREEDYDATA:uri_else}" }
overwrite => "category_id"
match => { "tag_content" => "%{GREEDYDATA:tag_content}\?%{GREEDYDATA:uri_else}" }
overwrite => "tag_content"
}
mutate {
rename => { "[k]" => "search_term" }
gsub => [ "search_term", "[++]", " "]
}
mutate {
rename => { "[details][nb_content_ids]" => "nb_contents_ids" }
rename => { "[details][collection_positions]" => "collection_positions" }
rename => { "[id]" => "image_id" }
split => { "collection_positions" => "," }
}
geoip {
source => "ip"
}
prune {
whitelist_names => [ "@timestamp", "geoip", "url", "search_query", "app_id", "member_id", "ip", "is_buyer","action", "date", "nb_contents_ids", "content_ids", "collection_positions", "image_id", "tag_content", "search_term", "category_id" ]
}
}