Hi guys,
We are running logstash 2.2.0 (against elasticsearch 2.3.1) and it can index >1.2mill docs/minute.
If we then upgrade to logstash-2.3.1 - it peaks at 200k docs/min.. 1/6 of the performance.
There's nothing in the errorlog of either elasticsearch, nor logstash.
Any ideas as to what I could try to remedy this, are very welcome.. (should I test with 5.0 alpha?)
Config is this:
input {
redis {
host => "127.0.0.1"
# Remember that type does NOT overwrite trype from shipper!
type => "redis-input"
# these settings should match the output of the agent
data_type => "list"
key => "logstash"
codec => json
threads => 8
}
redis {
host => "127.0.0.1"
type => "netflow"
data_type => "list"
key => "pet1year"
codec => json
threads => 2
}
redis {
host => "ytes02.example.org"
# Remember that type does NOT overwrite trype from shipper!
type => "redis-input"
# these settings should match the output of the agent
data_type => "list"
key => "logstash"
codec => json
threads => 8
}
redis {
host => "ytes02.example.org"
type => "netflow"
data_type => "list"
key => "pet1year"
codec => json
threads => 2
}
}
filter {
#choose index
#1.5.0 only feature
if [type] == "cnrdhcp" {
mutate { add_field => { "[index]" => "cnrdhcp-%{+YYYY.MM.dd}" } }
} else if [type] == "netflow" {
mutate { add_field => { "[index]" => "netflow-%{+YYYY.MM.dd}" } }
} else if [type] == "akamai_access_logs" {
mutate { add_field => { "[index]" => "cdn_access_logs-%{+YYYY.MM.dd}" } }
} else if [type] == "cdn_access_logs" {
mutate { add_field => { "[index]" => "cdn_access_logs-%{+YYYY.MM.dd}" } }
} else if [type] == "cdn_content_logs" {
mutate { add_field => { "[index]" => "cdn_content_logs-%{+YYYY.MM.dd}" } }
} else if [type] == "mpf_arkiv" {
mutate { add_field => { "[index]" => "mpf_arkiv-%{+YYYY.MM}" } }
} else {
mutate { add_field => { "[index]" => "logstash-%{+YYYY.MM.dd}" } }
}
#generate message_id if its not present..
if [message_id] {
mutate { add_tag => "hasmessage_id" }
} else if [message] {
ruby {
init => "require 'digest/sha1'"
code => "event['message_id'] = Digest::SHA1.base64digest(event['message'])"
}
} else {
#really broken input.. use timestamp as id for now.. - we should never land here
mutate { add_field => { "[@metadata][id]" => "%{@timestamp}" } }
}
}
output {
elasticsearch {
codec => plain {
charset => 'UTF-8'
}
hosts => "127.0.0.1:9200"
index => "%{[index]}"
manage_template => false
document_id => "%{[message_id]}"
}
statsd {
host => "localhost"
port => 8125
sender => "ytes01"
namespace => "servers"
increment => "logstash.processing"
}
}