Logstash Servers are runs with high CPU consumption

We are using 6.7.1 of Elastic stack. Our Logstash is running at high CPU (90+%) always. What do need need to look for to understand the high CPU consumption. We are using AWS machine M5.large for Logstash. Here is one of Logstash pipeline scripe we use:

input{
s3 {
bucket => "xxxx-diff-events-local"
exclude_pattern => "^((?!-diff-events-spark-streaming-).)*$"
prefix => "deployment/xxx/application/logs/"
region => "us-west-2"
codec => "json"
tags => ["xxxx"]
}
}
filter{
if ! [message][action] {
mutate {
add_field => { "[msg][description]" => "%{message}"}
}
mutate {
rename => { "msg" => "message"}
}
}
mutate {
add_field => {
"filepath" => "%{[@metadata][s3][key]}"
}
}
fingerprint {
source => "message"
target => "[@metadata][fingerprint]"
method => "MURMUR3"
}

mutate { add_field => { "[@metadata][milli]" => "%{[instant][nanoOfSecond]}" } }
truncate {
        fields => "[@metadata][milli]"
        length_bytes => 3
}
mutate {
        add_field => { "[@metadata][transactiontime]" => "%{[instant][epochSecond]}%{[@metadata][milli]}"  }
}
date {
        match => ["[@metadata][transactiontime]", "UNIX_MS"]
        target => "@timestamp"
}
mutate { add_field => { "[@metadata][lkt_date]" => "%{year}.%{month}" } }
mutate { rename => { "message" => "context" } }
mutate {
    rename =>{
        "[context][action]" => "[context][workflow]"
        "[context][entity_guid]" => "[context][ent_guid]"
        "[context][partition]" => "[work][partition]"
        "[context][offset]" => "[work][seq_num]"
        "[contextMap][table_name]" => "[work][source]"
        "[context][app_query_response]" => "[target][status]"
        "[context][description]" => "[message]"
        "[envtag]" => "[service][envtag]"
        "[level]" => "[log][level]"
        "[loggerName]" => "[log][name]"
        "[threadId]" => "[log][thread][id]"
        "[thread]" => "[log][thread][name]"
    }
}
mutate{
    remove_field => [ "instant", "month", "year", "monthday", "loggerFqcn", "threadPriority" ]
    add_field =>{
        "[service][name]" => "diffevent-spark"
    }
}

}

output {
file {
path => "/var/lib/logstash/debug.out"
}
elasticsearch {
hosts => ["https://xxx:19200"]
index => "diffevent-current-write"
user => "xxx"
password => "xxxxxxx"
manage_template => false
}
}

High CPU worries me. Is there anything wrong with the script above, Machine we are using etc. Any help with this will be greatly appreciated.

I suggest you look at the hot threads monitoring API.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.