We are using 6.7.1 of Elastic stack. Our Logstash is running at high CPU (90+%) always. What do need need to look for to understand the high CPU consumption. We are using AWS machine M5.large for Logstash. Here is one of Logstash pipeline scripe we use:
input{
s3 {
bucket => "xxxx-diff-events-local"
exclude_pattern => "^((?!-diff-events-spark-streaming-).)*$"
prefix => "deployment/xxx/application/logs/"
region => "us-west-2"
codec => "json"
tags => ["xxxx"]
}
}
filter{
if ! [message][action] {
mutate {
add_field => { "[msg][description]" => "%{message}"}
}
mutate {
rename => { "msg" => "message"}
}
}
mutate {
add_field => {
"filepath" => "%{[@metadata][s3][key]}"
}
}
fingerprint {
source => "message"
target => "[@metadata][fingerprint]"
method => "MURMUR3"
}
mutate { add_field => { "[@metadata][milli]" => "%{[instant][nanoOfSecond]}" } }
truncate {
fields => "[@metadata][milli]"
length_bytes => 3
}
mutate {
add_field => { "[@metadata][transactiontime]" => "%{[instant][epochSecond]}%{[@metadata][milli]}" }
}
date {
match => ["[@metadata][transactiontime]", "UNIX_MS"]
target => "@timestamp"
}
mutate { add_field => { "[@metadata][lkt_date]" => "%{year}.%{month}" } }
mutate { rename => { "message" => "context" } }
mutate {
rename =>{
"[context][action]" => "[context][workflow]"
"[context][entity_guid]" => "[context][ent_guid]"
"[context][partition]" => "[work][partition]"
"[context][offset]" => "[work][seq_num]"
"[contextMap][table_name]" => "[work][source]"
"[context][app_query_response]" => "[target][status]"
"[context][description]" => "[message]"
"[envtag]" => "[service][envtag]"
"[level]" => "[log][level]"
"[loggerName]" => "[log][name]"
"[threadId]" => "[log][thread][id]"
"[thread]" => "[log][thread][name]"
}
}
mutate{
remove_field => [ "instant", "month", "year", "monthday", "loggerFqcn", "threadPriority" ]
add_field =>{
"[service][name]" => "diffevent-spark"
}
}
}
output {
file {
path => "/var/lib/logstash/debug.out"
}
elasticsearch {
hosts => ["https://xxx:19200"]
index => "diffevent-current-write"
user => "xxx"
password => "xxxxxxx"
manage_template => false
}
}
High CPU worries me. Is there anything wrong with the script above, Machine we are using etc. Any help with this will be greatly appreciated.