Why is my Logstash using huge CPU and how to fix it?


#1

Hi,

I am using ELK GA 5.0.0 in my Linux box. I am using 2 logstash instances to pull from Kafka, and it is having high CPU utilization. I tried /_node/hot_threads?human=true and below are the results;

node 1

::: {}
Hot threads at 2017-05-15T06:25:27+00:00, busiestThreads=3: 
================================================================================
83.03 % of cpu usage, state: runnable, thread name: 'Ruby-0-Thread-13' 
/apps/ELK/Logstash/vendor/bundle/jruby/1.9/gems/logstash-input-kafka-5.0.5/lib/logstash/inputs/kafka.rb:191
	java.lang.System.identityHashCode(Native Method)
	org.jruby.javasupport.util.ObjectProxyCache.hash(ObjectProxyCache.java:174)
	org.jruby.javasupport.util.ObjectProxyCache.getOrCreate(ObjectProxyCache.java:163)
	org.jruby.javasupport.Java.getInstance(Java.java:385)
	org.jruby.javasupport.Java.getInstance(Java.java:364)
	org.jruby.javasupport.JavaUtil.convertJavaToUsableRubyObject(JavaUtil.java:194)
	org.jruby.javasupport.JavaMethod.convertReturn(JavaMethod.java:513)
	org.jruby.javasupport.JavaMethod.invokeDirectWithExceptionHandling(JavaMethod.java:437)
	org.jruby.javasupport.JavaMethod.invokeDirect(JavaMethod.java:300)
	org.jruby.java.invokers.InstanceMethodInvoker.call(InstanceMethodInvoker.java:35)
--------------------------------------------------------------------------------
82.94 % of cpu usage, state: runnable, thread name: 'Ruby-0-Thread-11' 
/apps/ELK/Logstash/vendor/bundle/jruby/1.9/gems/logstash-input-kafka-5.0.5/lib/logstash/inputs/kafka.rb:191
	sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
	sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
	sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)
	sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
	sun.nio.ch.SelectorImpl.selectNow(SelectorImpl.java:105)
	org.apache.kafka.common.network.Selector.select(Selector.java:452)
	org.apache.kafka.common.network.Selector.poll(Selector.java:277)
	org.apache.kafka.clients.NetworkClient.poll(NetworkClient.java:260)
	org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.clientPoll(ConsumerNetworkClient.java:360)
	org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.poll(ConsumerNetworkClient.java:224)
--------------------------------------------------------------------------------
82.87 % of cpu usage, state: runnable, thread name: 'Ruby-0-Thread-12' 
/apps/ELK/Logstash/vendor/bundle/jruby/1.9/gems/logstash-input-kafka-5.0.5/lib/logstash/inputs/kafka.rb:191
	sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
	sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
	sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)
	sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
	sun.nio.ch.SelectorImpl.selectNow(SelectorImpl.java:105)
	org.apache.kafka.common.network.Selector.select(Selector.java:452)
	org.apache.kafka.common.network.Selector.poll(Selector.java:277)
	org.apache.kafka.clients.NetworkClient.poll(NetworkClient.java:260)
	org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.clientPoll(ConsumerNetworkClient.java:360)
	org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.poll(ConsumerNetworkClient.java:224)
--------------------------------------------------------------------------------

node 2

::: {}
Hot threads at 2017-05-15T06:24:45+00:00, busiestThreads=3: 
================================================================================
94.48 % of cpu usage, state: runnable, thread name: 'Ruby-0-Thread-8' 
/apps/ELK/Logstash/vendor/bundle/jruby/1.9/gems/logstash-input-kafka-5.0.5/lib/logstash/inputs/kafka.rb:191
	sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
	sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
	sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)
	sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
	sun.nio.ch.SelectorImpl.selectNow(SelectorImpl.java:105)
	org.apache.kafka.common.network.Selector.select(Selector.java:452)
	org.apache.kafka.common.network.Selector.poll(Selector.java:277)
	org.apache.kafka.clients.NetworkClient.poll(NetworkClient.java:260)
	org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.clientPoll(ConsumerNetworkClient.java:360)
	org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.poll(ConsumerNetworkClient.java:224)
--------------------------------------------------------------------------------
0.5 % of cpu usage, state: waiting, thread name: '[main]>worker3' 
	sun.misc.Unsafe.park(Native Method)
	java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
	java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
	java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireInterruptibly(AbstractQueuedSynchronizer.java:897)
	java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireInterruptibly(AbstractQueuedSynchronizer.java:1222)
	java.util.concurrent.locks.ReentrantLock.lockInterruptibly(ReentrantLock.java:335)
	org.jruby.RubyThread.lockInterruptibly(RubyThread.java:1470)
	org.jruby.ext.thread.Mutex.lock(Mutex.java:91)
	org.jruby.ext.thread.Mutex.synchronize(Mutex.java:147)
	org.jruby.ext.thread.Mutex$INVOKER$i$0$0$synchronize.call(Mutex$INVOKER$i$0$0$synchronize.gen)
--------------------------------------------------------------------------------
0.5 % of cpu usage, state: waiting, thread name: '[main]>worker4' 
	sun.misc.Unsafe.park(Native Method)
	java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
	java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
	java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireInterruptibly(AbstractQueuedSynchronizer.java:897)
	java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireInterruptibly(AbstractQueuedSynchronizer.java:1222)
	java.util.concurrent.locks.ReentrantLock.lockInterruptibly(ReentrantLock.java:335)
	org.jruby.RubyThread.lockInterruptibly(RubyThread.java:1470)
	org.jruby.ext.thread.Mutex.lock(Mutex.java:91)
	org.jruby.ext.thread.Mutex.synchronize(Mutex.java:147)
	org.jruby.ext.thread.Mutex$INVOKER$i$0$0$synchronize.call(Mutex$INVOKER$i$0$0$synchronize.gen)
--------------------------------------------------------------------------------

Somebody kindly tell me why is this happening and how to fix this? Thanks in advance.


#2

I only run one instance of logstash on my computer and it uses only a few percent of my CPU indeed. This high CPU usage might indicate that your config is somewhere wrong (at least, that's the case for me).

Do you have any output when you use rubydebug output codec?


#3

Hi, below are my logstash configurations;

logstash 1

input {
	kafka{
		group_id => "mygroup1"
		topics => ["mytopic1"]
		bootstrap_servers => "192.168.1.10:9091,192.168.1.11:9093,192.168.1.12:9092"
		codec => json
	}
}
filter {
	if[fields][logtype] == "loga"{
		grok{
			match => { "message" => "%{NUMBER:data1}\s*%{NUMBER:data2}\s*%{NUMBER:data3}\s*%{NUMBER:data4}\s*%{NUMBER:data5}\s*%{NUMBER:data6}\s*%{NUMBER:data7}\s*%{NUMBER:data8}\s*%{NUMBER:data9}\s*%{NUMBER:data10}\s*%{NUMBER:data11}\s*%{NUMBER:data12}\s*%{NUMBER:data13}\s*%{NUMBER:data14}\s*%{NUMBER:data15}\s*%{NUMBER:data16}\s*%{NUMBER:data17}\t (?<timestamp>20%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}(?::?%{SECOND}))" }
		}
		date{
			match => [ "timestamp", "YYYY-MM-dd HH:mm:ss" ]
			target => "@timestamp"
		}
	}else{
		grok {
			match => { "message" => "<(?<timestamp>%{MONTH} %{MONTHDAY}, 20%{YEAR} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) (?:AM|PM))\> <%{GREEDYDATA:data1}> <%{GREEDYDATA:data2}> <%{GREEDYDATA:data3}> <%{GREEDYDATA:data4}> <%{GREEDYDATA:data5}> <%{GREEDYDATA:data6}> <%{GREEDYDATA:data7}>\n <%{GREEDYDATA:text1}>" }
		}
		date {
			match => [ "timestamp", "MMM dd, YYYY hh:mm:ss:SSS aa" ]
			timezone => "UTC"
			target => "@timestamp"
		}
	} 
} 

output{
	if "unwanted_log" not in [tags]{
		if[fields][logtype] == "loga"{
		 	elasticsearch {
				hosts => ["192.168.1.100:9200","192.168.1.101:9200","192.168.1.102:9200"]
				index => "loga-%{+YYYY-MM-dd}"
				user => elastic
				password => "El%405tic"
			}
		}else{
			elasticsearch {
				user => elastic
				password => "El%405tic"
				hosts => ["192.168.1.100:9200","192.168.1.101:9200","192.168.1.102:9200"]
				index => "logb-%{+YYYY-MM-dd}"
			}
		}
	}
}

logstash 2

input {
	kafka{
		group_id => "group2"
		topics => ["mytopic2"]
		bootstrap_servers => "192.168.1.10:9091,192.168.1.11:9093,192.168.1.12:9092"
		codec => json
		add_field => { "logtype" => "LOGFILE" }
	}
	kafka{
        codec => json
		group_id => "group3"
		topics => ["mytopic3"]
		bootstrap_servers => "192.168.1.10:9091,192.168.1.11:9093,192.168.1.12:9092"
		add_field => { "logtype" => "LOGFILE"  }
	}
	kafka{
		group_id => "group4"
        topics => ["mytopic4"]
		bootstrap_servers => "192.168.1.10:9091,192.168.1.11:9093,192.168.1.12:9092"
		add_field => { "logtype" => "LOGFILE2" }
	}
}

filter {
	if[logtype] == "LOGFILE"{
		if[fields][logtype] == "LOGFILE1A"{
			grok{
				match => { "message" => "####<(?<timestamp>%{MONTH} %{MONTHDAY}, 20%{YEAR} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) (?:AM|PM) (?:GMT))\> <%{GREEDYDATA:data1}> <%{GREEDYDATA:data2}> <%{GREEDYDATA}> <%{GREEDYDATA:data3}> <%{GREEDYDATA}> <%{GREEDYDATA}> <%{GREEDYDATA}> <%{GREEDYDATA:data4}> <%{GREEDYDATA}> <%{GREEDYDATA}> <%{GREEDYDATA:data5}>" }	
			}
			date {
				match => [ "timestamp", "MMM dd, YYYY hh:mm:ss aa z" ]
				timezone => "GMT"
				target => "@timestamp"
			}
		}else if[fields][logtype] == "LOGFILE1B"{
			grok{
				match => { "message" => "%{NOTSPACE:data1} %{NOTSPACE:data2} ('%{NOTSPACE:data3}'|%{NOTSPACE:data3}) %{NOTSPACE:data4} %{NOTSPACE:data5} %{NOTSPACE:data6} %{NOTSPACE:data7} %{NOTSPACE:data8} %{NOTSPACE:data9} %{NOTSPACE:data10} ('%{NOTSPACE:data11}'|%{NOTSPACE:data11}) %{NOTSPACE:data12} ('%{GREEDYDATA:data13}'|%{NOTSPACE:data13}) ('%{GREEDYDATA:data14}'|%{NOTSPACE:data14}) ('%{GREEDYDATA:data15}'|%{NOTSPACE:data15}) ('%{GREEDYDATA:data16}'|%{NOTSPACE:data16})" }
			}		
			if [data9] == "/home/index" {
				mutate {
					add_tag => [ "unwanted_entry"]
				}
			}else{
				if [data4] == "-" {
					mutate {
						replace => [ "data4", "-1" ]
					}
				}

				if [data6] == "-" {
					mutate {
						replace => [ "data6", "-1" ]
					}
				}

				mutate {
					add_field => {
						"dateTime" => "%{data1} %{data2}"
					}
					remove_field => [ "data1", "data2" ]
					gsub => ["data7","\'",""]
				}
				date {
					match => [ "dateTime", "YYYY-MM-dd HH:mm:ss" ]
					timezone => "GMT"
					target => "@timestamp"
				}
			}
		}
	}else if[logtype] == "LOGFILE2"{
		grok {
			match => { "message" => "^##(?<level>(LV1|LV2|LV3)) (?<timestamp>%{MONTHDAY}-%{MONTH}-%{YEAR} %{TIME}) %{NOTSPACE:data1} %{NOTSPACE:data2} %{GREEDYDATA:data3}" }
 	    }
	    date {
			match => [ "timestamp", "dd-MMM-yyyy HH:mm:ss.SSS" ]
			timezone => "UTC"
			target => "@timestamp"
		}
	}
}

output{
	if "unwanted_entry" not in [tags]{
		if[logtype] == "LOGFILE"{
			if[fields][logtype] == "LOGFILE1A"{
				elasticsearch {
					hosts => ["192.168.1.100:9200","192.168.1.101:9200","192.168.1.102:9200"]
					index => "loga-%{+YYYY-MM-dd}"
					user => elastic
					password => "El%405tic"
				}
			}else if[fields][logtype] == "LOGFILE1B"{
				elasticsearch {
					hosts => ["192.168.1.100:9200","192.168.1.101:9200","192.168.1.102:9200"]
					index => "logb-%{+YYYY-MM-dd}"
					user => elastic
					password => "El%405tic"
				}
			}
		}else if[logtype] == "LOGFILE2"{
			elasticsearch {
				hosts => ["192.168.1.100:9200","192.168.1.101:9200","192.168.1.102:9200"]
				index => "logc-%{+YYYY-MM-dd}"
				user => elastic
				password => "El%405tic"
			}
		}	
	}
}

#4

in output section, add:

stdout {
  codec => rubydebug 
}

Launch logstash in command line (or configure log output) and see if there is any error


#5

Hi, is the same possible with file output, so that I can analyze it better, like;

output {
 file {
   path => rubydebug.log
   codec => rubydebug
 }
}

#6

Probably


(system) #7

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.