I run ELK stack, APM server, and Metricbeat (Docker and System module) with Docker.
The Logstash is used for reading log files, and both the APM server and Metricbeat are for monitoring with a period of 10 seconds.
Sometimes a high CPU usage occurs at Elasticsearch, and with Nodes hot threads API I found that it's caused by the Lucene Merge Thread.
The CPU usage sometimes even reaches over 200 % (use docker stats
to monitor) and causes an HTTP error while trying to access a Kibana dashboard or Elasticsearch API at this moment.
I found some solutions for this problem, says that setting index.merge.scheduler.max_thread_count
to 1
might help, but adding this setting to the config/elasticsearch.yml (mounted with Docker volume) causes an error: java.lang.IllegalArgumentException: node settings must not contain any index level settings
.
Besides, I found that the period of high CPU occurrence is similar to the period of monitoring by APM server and Metricbeat, maybe the root cause is that there are too many documents written to Elasticsearch?
Is there any proper solution to this problem? Or what configurations should I try to tune to fit my environment?
Hot thread output:
::: {elasticsearch}{WPi6_SLRTiiDXAt41tpmCQ}{AAM0DuSZSUCT_KYSEvNYrQ}{172.18.0.7}{172.18.0.7:9300}{dimr}
Hot threads at 2022-09-15T07:55:38.921Z, interval=500ms, busiestThreads=3, ignoreIdleThreads=true:
63.2% (316.2ms out of 500ms) cpu usage by thread 'elasticsearch[elasticsearch][[metricbeat-7.12.0-2022.09.15][0]: Lucene Merge Thread #147]'
4/10 snapshots sharing following 12 elements
app//org.apache.lucene.codecs.lucene80.Lucene80DocValuesConsumer.addSortedNumericField(Lucene80DocValuesConsumer.java:705)
app//org.apache.lucene.codecs.DocValuesConsumer.mergeSortedNumericField(DocValuesConsumer.java:375)
app//org.apache.lucene.codecs.DocValuesConsumer.merge(DocValuesConsumer.java:147)
app//org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.merge(PerFieldDocValuesFormat.java:155)
app//org.apache.lucene.index.SegmentMerger.mergeDocValues(SegmentMerger.java:195)
app//org.apache.lucene.index.SegmentMerger.merge(SegmentMerger.java:150)
app//org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:4760)
app//org.apache.lucene.index.IndexWriter.merge(IndexWriter.java:4364)
app//org.apache.lucene.index.IndexWriter$IndexWriterMergeSource.merge(IndexWriter.java:5923)
app//org.apache.lucene.index.ConcurrentMergeScheduler.doMerge(ConcurrentMergeScheduler.java:624)
app//org.elasticsearch.index.engine.ElasticsearchConcurrentMergeScheduler.doMerge(ElasticsearchConcurrentMergeScheduler.java:100)
app//org.apache.lucene.index.ConcurrentMergeScheduler$MergeThread.run(ConcurrentMergeScheduler.java:682)
3/10 snapshots sharing following 10 elements
app//org.apache.lucene.util.bkd.BKDWriter.merge(BKDWriter.java:506)
app//org.apache.lucene.codecs.lucene86.Lucene86PointsWriter.merge(Lucene86PointsWriter.java:237)
app//org.apache.lucene.index.SegmentMerger.mergePoints(SegmentMerger.java:201)
app//org.apache.lucene.index.SegmentMerger.merge(SegmentMerger.java:161)
app//org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:4760)
app//org.apache.lucene.index.IndexWriter.merge(IndexWriter.java:4364)
app//org.apache.lucene.index.IndexWriter$IndexWriterMergeSource.merge(IndexWriter.java:5923)
app//org.apache.lucene.index.ConcurrentMergeScheduler.doMerge(ConcurrentMergeScheduler.java:624)
app//org.elasticsearch.index.engine.ElasticsearchConcurrentMergeScheduler.doMerge(ElasticsearchConcurrentMergeScheduler.java:100)
app//org.apache.lucene.index.ConcurrentMergeScheduler$MergeThread.run(ConcurrentMergeScheduler.java:682)
3/10 snapshots sharing following 9 elements
app//org.apache.lucene.codecs.lucene86.Lucene86PointsWriter.merge(Lucene86PointsWriter.java:237)
app//org.apache.lucene.index.SegmentMerger.mergePoints(SegmentMerger.java:201)
app//org.apache.lucene.index.SegmentMerger.merge(SegmentMerger.java:161)
app//org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:4760)
app//org.apache.lucene.index.IndexWriter.merge(IndexWriter.java:4364)
app//org.apache.lucene.index.IndexWriter$IndexWriterMergeSource.merge(IndexWriter.java:5923)
app//org.apache.lucene.index.ConcurrentMergeScheduler.doMerge(ConcurrentMergeScheduler.java:624)
app//org.elasticsearch.index.engine.ElasticsearchConcurrentMergeScheduler.doMerge(ElasticsearchConcurrentMergeScheduler.java:100)
app//org.apache.lucene.index.ConcurrentMergeScheduler$MergeThread.run(ConcurrentMergeScheduler.java:682)
Docker compose file:
version: "3.7"
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2
container_name: elasticsearch
hostname: elasticsearch
volumes:
- esdata:/usr/share/elasticsearch/data
- ./elk/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
ports:
- "9200:9200"
- "9300:9300"
environment:
- discovery.type=single-node
kibana:
image: docker.elastic.co/kibana/kibana-oss:7.10.2
container_name: kibana
hostname: kibana
ports:
- "5601:5601"
environment:
SERVERNAME: kibana
ELASTICSEARCH_URL: http://elasticsearch:9200
ELASTICSEARCH_HOSTS: http://elasticsearch:9200
apm-server:
image: docker.elastic.co/apm/apm-server-oss:7.12.0
volumes:
- ./elk/apm-server.docker.yml:/usr/share/apm-server/apm-server.yml:ro
container_name: apm-server
hostname: apm-server
ports:
- "8200:8200"
environment:
- output.elasticsearch.hosts=http://elasticsearch:9200
logstash:
image: docker.elastic.co/logstash/logstash-oss:7.12.0
container_name: logstash
links:
- elasticsearch
volumes:
- ./elk/logstash.conf:/usr/share/logstash/pipeline/logstash.conf
- /data/log:/usr/share/logstash/proj_log:ro
ports:
- "5044:5044"
metricbeat:
image: docker.elastic.co/beats/metricbeat-oss:7.12.0
container_name: metricbeat
user: root
volumes:
- ./elk/metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
- /sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro
- /proc:/hostfs/proc:ro
- /:/hostfs:ro
environment:
- setup.kibana.host=kibana:5601
- output.elasticsearch.hosts=["http://elasticsearch:9200"]
command: ["-system.hostfs=/hostfs"]
volumes:
esdata:
metricbeat.yml
metricbeat.modules:
- module: docker
metricsets:
- "cpu"
- "diskio"
- "memory"
- "network"
hosts: ["unix:///var/run/docker.sock"]
period: 10s
enabled: true
- module: system
metricsets:
- cpu
- load
- memory
- network
- diskio
- fsstat
enabled: true
period: 10s
processes: ['.*']
cpu.metrics: ["percentages","normalized_percentages"]
core.metrics: ["percentages"]
process.include_top_n:
by_cpu: 5
by_memory: 5
output.elasticsearch:
hosts: ["http://elasticsearch:9200"]