Hi,
I am running an Elastic Stack with multiple Logstash servers in different networks to aggregate, filter and forward the logs. For some time now I have the problem that some of these Logstash nodes regularly have a very high load and CPU usage. When I restart the Logstash service, it is all fine again for a while. You can see this behavior in this screenshot of the Elastic Monitoring
I already searched for quite a bit on this problem, but still have no clue what exactly is causing these increased loads. I would be very happy if anyone has any idea what might be the cause and could point me in a direction!
Some more information on my configuration:
The specs of the nodes:
4 vCPUs
8GB RAM
6GB Heap
Logstash config:
# Ansible managed
pipeline.ordered: auto
path:
data: /var/lib/logstash
logs: /var/log/logstash
xpack.monitoring.enabled: false
monitoring.enabled: false
monitoring.cluster_uuid: "uuid"
xpack.management:
enabled: true
elasticsearch:
hosts: ["https://elastic1:9200", "https://elastic2:9200", "https://elastic3:9200"]
username: "logstash_internal"
password: "password"
ssl:
verification_mode: certificate
certificate_authority: /etc/logstash/certs/elastic-stack-ca.pem
logstash.poll_interval: "5s"
pipeline.id: ["lan"]
Pipeline:
input {
elastic_agent {
host => "${IP_ADDRESS}"
port => 5044
ssl_enabled => true
ssl_certificate => "/etc/logstash/certs/logstash.crt.pem"
ssl_key => "/etc/logstash/certs/logstash.key.pem"
ssl_client_authentication => "none"
type => "elastic_agent"
}
gelf {
host => "${IP_ADDRESS}"
use_udp => false
use_tcp => true
port => 12201
type => "gelf"
}
syslog {
host => "${IP_ADDRESS}"
port => 10514
type => "syslog"
proxy_protocol => true
ecs_compatibility => "v8"
}
}
filter {
if [host][hostname] in ["server1", "server2", "server3", "server4"] {
mutate {
add_field => {
"[data_stream][type]" => "logs"
"[data_stream][dataset]" => "webservices"
"[data_stream][namespace]" => "dev"
}
}
}
if [host][hostname] in ["server5", "server6", "server7", "server8", "server9"] {
mutate {
add_field => {
"[data_stream][type]" => "logs"
"[data_stream][dataset]" => "webservices"
"[data_stream][namespace]" => "prod"
}
}
}
}
output {
if ([data_stream][type] and [data_stream][type] != "" ) and ([data_stream][dataset] and [data_stream][dataset] != "" ) {
elasticsearch {
hosts => ["https://elastic1:9200", "https://elastic2:9200", "https://elastic3:9200"]
data_stream => "true"
user => "logstash_internal"
password => "password"
ssl_enabled => "true"
ssl_verification_mode => "full"
ssl_certificate_authorities => "/etc/logstash/certs/elastic-stack-ca.pem"
}
} else if [type] == "syslog" {
elasticsearch {
hosts => ["https://elastic1:9200", "https://elastic2:9200", "https://elastic3:9200"]
ilm_enabled => true
ilm_rollover_alias => "syslog"
ilm_pattern => "{now/d}-000001"
ilm_policy => "syslog"
user => "logstash_internal"
password => "password"
ssl_enabled => "true"
ssl_verification_mode => "full"
ssl_certificate_authorities => "/etc/logstash/certs/elastic-stack-ca.pem"
}
}
if [type] == "gelf" {
elasticsearch {
hosts => ["https://elastic1:9200", "https://elastic2:9200", "https://elastic3:9200"]
ilm_enabled => true
ilm_rollover_alias => "gelf"
ilm_pattern => "{now/d}-000001"
ilm_policy => "gelf"
user => "logstash_internal"
password => "password"
ssl_enabled => "true"
ssl_verification_mode => "full"
ssl_certificate_authorities => "/etc/logstash/certs/elastic-stack-ca.pem"
}
}
}