I have a 7.17 cluster with 6 data nodes.
There's a website using this ES cluster with a dashboard that customers are constantly querying... It has data for the last 2 months exceeding 300 GB in only 2 indices (2 pri, 1 repl for both) and like 600 GB in 1 pri 1 repl indices for the last year (except for the last 2 months).
Checking hot threads seems to confirm this, even though I'm not sure what other is supposed to mean in the output below ( 100% cpu=64.5, other=35.5).
I need to do something because CPU is constantly hitting 90+% generating alert and fears of cluster collapse...
I intend to increase replica count to 2 on the older indices and maybe on the monthly before the current month.
Do you think it will help? Unfortunately, scaling up the cluster is not an option right now...
I didn't create this situation, maybe for the next month index 3 pri 1 repl would work better? Or even 6 pri 1 repl, because the primary index size will be around 200GB... It's always hard to make these decisions for monthly indices...
Thank you for any insight, advice you may give!
::: {myserver-001}{Hxxxxxxxxxxxxxxxxx}{gxxxxxxxxxxxxx}{IP_ADDRESS}{IP_ADDRESS:19949}{hirst}{logical_availability_zone=zone-1, server_name=myserver-001.491289193c864ccbbd7dca48e9c372b7, availability_zone=eu-west-1c, xpack.installed=true, data=hot, instance_configuration=aws.data.highio.i3, transform.node=true, region=eu-west-1}
Hot threads at 2022-04-27T16:17:50.218Z, interval=500ms, busiestThreads=9999, ignoreIdleThreads=false:
100.0% [cpu=64.5%, other=35.5%] (500ms out of 500ms) cpu usage by thread 'elasticsearch[myserver-001][search][T#13]'
4/10 snapshots sharing following 26 elements
app//org.apache.lucene.search.ConjunctionDISI.nextDoc(ConjunctionDISI.java:253)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.scoreRange(Weight.java:268)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.score(Weight.java:245)
app//org.elasticsearch.search.internal.CancellableBulkScorer.score(CancellableBulkScorer.java:45)
app//org.apache.lucene.search.BulkScorer.score(BulkScorer.java:39)
app//org.elasticsearch.search.internal.ContextIndexSearcher.searchLeaf(ContextIndexSearcher.java:194)
app//org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:167)
app//org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:443)
app//org.elasticsearch.search.query.QueryPhase.searchWithCollector(QueryPhase.java:255)
app//org.elasticsearch.search.query.QueryPhase.executeInternal(QueryPhase.java:212)
app//org.elasticsearch.search.query.QueryPhase.execute(QueryPhase.java:98)
app//org.elasticsearch.search.SearchService.loadOrExecuteQueryPhase(SearchService.java:458)
app//org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:622)
app//org.elasticsearch.search.SearchService.lambda$executeQueryPhase$2(SearchService.java:483)
app//org.elasticsearch.search.SearchService$$Lambda$7025/0x0000000801dbbbd8.get(Unknown Source)
app//org.elasticsearch.search.SearchService$$Lambda$7026/0x0000000801dbbe00.get(Unknown Source)
app//org.elasticsearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:47)
app//org.elasticsearch.action.ActionRunnable$$Lambda$6981/0x0000000801daec10.accept(Unknown Source)
app//org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
app//org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:33)
app//org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
java.base@17.0.1/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
java.base@17.0.1/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
java.base@17.0.1/java.lang.Thread.run(Thread.java:833)
2/10 snapshots sharing following 29 elements
app//org.apache.lucene.codecs.lucene80.Lucene80DocValuesProducer$BinaryDecoder.decode(Lucene80DocValuesProducer.java:843)
app//org.apache.lucene.codecs.lucene80.Lucene80DocValuesProducer$19.binaryValue(Lucene80DocValuesProducer.java:921)
org.elasticsearch.xpack.wildcard.mapper.BinaryDvConfirmedAutomatonQuery$1$1.matches(BinaryDvConfirmedAutomatonQuery.java:88)
app//org.apache.lucene.search.ConjunctionDISI$ConjunctionTwoPhaseIterator.matches(ConjunctionDISI.java:381)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.scoreRange(Weight.java:265)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.score(Weight.java:245)
app//org.elasticsearch.search.internal.CancellableBulkScorer.score(CancellableBulkScorer.java:45)
app//org.apache.lucene.search.BulkScorer.score(BulkScorer.java:39)
app//org.elasticsearch.search.internal.ContextIndexSearcher.searchLeaf(ContextIndexSearcher.java:194)
app//org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:167)
app//org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:443)
app//org.elasticsearch.search.query.QueryPhase.searchWithCollector(QueryPhase.java:255)
app//org.elasticsearch.search.query.QueryPhase.executeInternal(QueryPhase.java:212)
app//org.elasticsearch.search.query.QueryPhase.execute(QueryPhase.java:98)
app//org.elasticsearch.search.SearchService.loadOrExecuteQueryPhase(SearchService.java:458)
app//org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:622)
app//org.elasticsearch.search.SearchService.lambda$executeQueryPhase$2(SearchService.java:483)
app//org.elasticsearch.search.SearchService$$Lambda$7025/0x0000000801dbbbd8.get(Unknown Source)
app//org.elasticsearch.search.SearchService$$Lambda$7026/0x0000000801dbbe00.get(Unknown Source)
app//org.elasticsearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:47)
app//org.elasticsearch.action.ActionRunnable$$Lambda$6981/0x0000000801daec10.accept(Unknown Source)
app//org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
app//org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:33)
app//org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
java.base@17.0.1/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
java.base@17.0.1/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
java.base@17.0.1/java.lang.Thread.run(Thread.java:833)
2/10 snapshots sharing following 30 elements
app//org.apache.lucene.util.compress.LZ4.decompress(LZ4.java:137)
app//org.apache.lucene.codecs.lucene80.Lucene80DocValuesProducer$BinaryDecoder.decode(Lucene80DocValuesProducer.java:873)
app//org.apache.lucene.codecs.lucene80.Lucene80DocValuesProducer$19.binaryValue(Lucene80DocValuesProducer.java:921)
org.elasticsearch.xpack.wildcard.mapper.BinaryDvConfirmedAutomatonQuery$1$1.matches(BinaryDvConfirmedAutomatonQuery.java:88)
app//org.apache.lucene.search.ConjunctionDISI$ConjunctionTwoPhaseIterator.matches(ConjunctionDISI.java:381)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.scoreRange(Weight.java:265)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.score(Weight.java:245)
app//org.elasticsearch.search.internal.CancellableBulkScorer.score(CancellableBulkScorer.java:45)
app//org.apache.lucene.search.BulkScorer.score(BulkScorer.java:39)
app//org.elasticsearch.search.internal.ContextIndexSearcher.searchLeaf(ContextIndexSearcher.java:194)
app//org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:167)
app//org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:443)
app//org.elasticsearch.search.query.QueryPhase.searchWithCollector(QueryPhase.java:255)
app//org.elasticsearch.search.query.QueryPhase.executeInternal(QueryPhase.java:212)
app//org.elasticsearch.search.query.QueryPhase.execute(QueryPhase.java:98)
app//org.elasticsearch.search.SearchService.loadOrExecuteQueryPhase(SearchService.java:458)
app//org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:622)
app//org.elasticsearch.search.SearchService.lambda$executeQueryPhase$2(SearchService.java:483)
app//org.elasticsearch.search.SearchService$$Lambda$7025/0x0000000801dbbbd8.get(Unknown Source)
app//org.elasticsearch.search.SearchService$$Lambda$7026/0x0000000801dbbe00.get(Unknown Source)
app//org.elasticsearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:47)
app//org.elasticsearch.action.ActionRunnable$$Lambda$6981/0x0000000801daec10.accept(Unknown Source)
app//org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
app//org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:33)
app//org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
java.base@17.0.1/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
java.base@17.0.1/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
java.base@17.0.1/java.lang.Thread.run(Thread.java:833)
2/10 snapshots sharing following 28 elements
app//org.apache.lucene.codecs.lucene80.Lucene80DocValuesProducer$19.binaryValue(Lucene80DocValuesProducer.java:921)
org.elasticsearch.xpack.wildcard.mapper.BinaryDvConfirmedAutomatonQuery$1$1.matches(BinaryDvConfirmedAutomatonQuery.java:88)
app//org.apache.lucene.search.ConjunctionDISI$ConjunctionTwoPhaseIterator.matches(ConjunctionDISI.java:381)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.scoreRange(Weight.java:265)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.score(Weight.java:245)
app//org.elasticsearch.search.internal.CancellableBulkScorer.score(CancellableBulkScorer.java:45)
app//org.apache.lucene.search.BulkScorer.score(BulkScorer.java:39)
app//org.elasticsearch.search.internal.ContextIndexSearcher.searchLeaf(ContextIndexSearcher.java:194)
app//org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:167)
app//org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:443)
app//org.elasticsearch.search.query.QueryPhase.searchWithCollector(QueryPhase.java:255)
app//org.elasticsearch.search.query.QueryPhase.executeInternal(QueryPhase.java:212)
app//org.elasticsearch.search.query.QueryPhase.execute(QueryPhase.java:98)
app//org.elasticsearch.search.SearchService.loadOrExecuteQueryPhase(SearchService.java:458)
app//org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:622)
app//org.elasticsearch.search.SearchService.lambda$executeQueryPhase$2(SearchService.java:483)
app//org.elasticsearch.search.SearchService$$Lambda$7025/0x0000000801dbbbd8.get(Unknown Source)
app//org.elasticsearch.search.SearchService$$Lambda$7026/0x0000000801dbbe00.get(Unknown Source)
app//org.elasticsearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:47)
app//org.elasticsearch.action.ActionRunnable$$Lambda$6981/0x0000000801daec10.accept(Unknown Source)
app//org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
app//org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:33)
app//org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
java.base@17.0.1/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
java.base@17.0.1/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
java.base@17.0.1/java.lang.Thread.run(Thread.java:833)