Hello,
I have tried to read through most of the topics and tried with the suggestions, nothing has helped so far.
We are running an elasticsearch cluster (v7.16.1) with 2 nodes (4 CPUs, heap assigned as 16GB - physical memory is 32 GB). We have been managing our indexes with ILM. We roll our indexes daily, move to warm phase after 2 days where merge is executed to reduce the count of segments per shard to 2 and replicas are reduced to 0.
Search latency is crippling slow - our searches get answered after 10 seconds and with timeout configured for 30seconds, our search requests get timed out.
We use data streams and have 7 data streams with an average of 30 backing indexes. The current write index has one 1 shard and 1 replica. Average shard size is less than 50 GB and there are 1500 segments (across both nodes)
::: {xxxxxxx}{JS2hVWeHQOyV48TgxDlXZw}{HUXiS-ktQpyXWmTwyqeEFA}{xxxxxxx}{aa.bb.cc.dd:9300}{cdfhilmrstw}{ml.machine_memory=33557848064, xpack.installed=true, transform.node=true, ml.max_open_jobs=512, dc_type=dc, ml.max_jvm_size=16785604608}
Hot threads at 2022-10-17T13:56:16.863Z, interval=500ms, busiestThreads=3, ignoreIdleThreads=true:
100.0% [cpu=87.6%, other=12.4%] (500ms out of 500ms) cpu usage by thread 'elasticsearch[xxxxxxx][search][T#2]'
2/10 snapshots sharing following 54 elements
app//org.elasticsearch.xcontent.support.AbstractXContentParser.readValueUnsafe(AbstractXContentParser.java:394)
app//org.elasticsearch.xcontent.support.AbstractXContentParser.readMapEntries(AbstractXContentParser.java:318)
app//org.elasticsearch.xcontent.support.AbstractXContentParser.readValueUnsafe(AbstractXContentParser.java:394)
app//org.elasticsearch.xcontent.support.AbstractXContentParser.readMapEntries(AbstractXContentParser.java:318)
app//org.elasticsearch.xcontent.support.AbstractXContentParser.readMapSafe(AbstractXContentParser.java:304)
app//org.elasticsearch.xcontent.support.AbstractXContentParser.map(AbstractXContentParser.java:254)
app//org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:210)
app//org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:138)
app//org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:106)
app//org.elasticsearch.search.lookup.SourceLookup.sourceAsMapAndType(SourceLookup.java:90)
app//org.elasticsearch.search.lookup.SourceLookup.source(SourceLookup.java:79)
app//org.elasticsearch.script.AbstractFieldScript.extractFromSource(AbstractFieldScript.java:93)
app//org.elasticsearch.script.AbstractFieldScript.emitFromSource(AbstractFieldScript.java:109)
app//org.elasticsearch.script.StringFieldScript$1$1.execute(StringFieldScript.java:35)
app//org.elasticsearch.script.StringFieldScript.resultsForDoc(StringFieldScript.java:94)
app//org.elasticsearch.search.runtime.AbstractStringScriptFieldQuery.matches(AbstractStringScriptFieldQuery.java:27)
app//org.elasticsearch.search.runtime.AbstractStringScriptFieldQuery.matches(AbstractStringScriptFieldQuery.java:19)
app//org.elasticsearch.search.runtime.AbstractScriptFieldQuery$1$1.matches(AbstractScriptFieldQuery.java:76)
app//org.apache.lucene.search.ConjunctionDISI$ConjunctionTwoPhaseIterator.matches(ConjunctionDISI.java:381)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.scoreRange(Weight.java:265)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.score(Weight.java:245)
app//org.elasticsearch.search.internal.CancellableBulkScorer.score(CancellableBulkScorer.java:45)
app//org.apache.lucene.search.BulkScorer.score(BulkScorer.java:39)
app//org.elasticsearch.search.internal.ContextIndexSearcher.searchLeaf(ContextIndexSearcher.java:194)
app//org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:167)
app//org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:443)
app//org.elasticsearch.search.query.QueryPhase.searchWithCollector(QueryPhase.java:255)
app//org.elasticsearch.search.query.QueryPhase.executeInternal(QueryPhase.java:212)
app//org.elasticsearch.search.query.QueryPhase.execute(QueryPhase.java:98)
app//org.elasticsearch.indices.IndicesService.lambda$loadIntoContext$26(IndicesService.java:1522)
app//org.elasticsearch.indices.IndicesService$$Lambda$7190/0x0000000801baa218.accept(Unknown Source)
app//org.elasticsearch.indices.IndicesService.lambda$cacheShardLevelResult$27(IndicesService.java:1588)
app//org.elasticsearch.indices.IndicesService$$Lambda$7191/0x0000000801baa978.get(Unknown Source)
app//org.elasticsearch.indices.IndicesRequestCache$Loader.load(IndicesRequestCache.java:178)
app//org.elasticsearch.indices.IndicesRequestCache$Loader.load(IndicesRequestCache.java:161)
app//org.elasticsearch.common.cache.Cache.computeIfAbsent(Cache.java:419)
app//org.elasticsearch.indices.IndicesRequestCache.getOrCompute(IndicesRequestCache.java:124)
app//org.elasticsearch.indices.IndicesService.cacheShardLevelResult(IndicesService.java:1594)
app//org.elasticsearch.indices.IndicesService.loadIntoContext(IndicesService.java:1516)
app//org.elasticsearch.search.SearchService.loadOrExecuteQueryPhase(SearchService.java:456)
app//org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:622)
app//org.elasticsearch.search.SearchService.lambda$executeQueryPhase$2(SearchService.java:483)
app//org.elasticsearch.search.SearchService$$Lambda$6500/0x0000000801a72de8.get(Unknown Source)
app//org.elasticsearch.search.SearchService$$Lambda$6501/0x0000000801a73010.get(Unknown Source)
app//org.elasticsearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:47)
app//org.elasticsearch.action.ActionRunnable$$Lambda$6502/0x0000000801a73238.accept(Unknown Source)
app//org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
app//org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:33)
app//org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
java.base@16.0.2/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130)
java.base@16.0.2/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630)
java.base@16.0.2/java.lang.Thread.run(Thread.java:831)
3/10 snapshots sharing following 58 elements
java.base@16.0.2/sun.nio.ch.FileChannelImpl.readInternal(FileChannelImpl.java:815)
java.base@16.0.2/sun.nio.ch.FileChannelImpl.read(FileChannelImpl.java:800)
app//org.apache.lucene.store.NIOFSDirectory$NIOFSIndexInput.readInternal(NIOFSDirectory.java:170)
app//org.apache.lucene.store.BufferedIndexInput.refill(BufferedIndexInput.java:315)
app//org.apache.lucene.store.BufferedIndexInput.readBytes(BufferedIndexInput.java:133)
app//org.apache.lucene.store.BufferedIndexInput.readBytes(BufferedIndexInput.java:111)
app//org.apache.lucene.util.compress.LZ4.decompress(LZ4.java:103)
app//org.apache.lucene.codecs.lucene87.LZ4WithPresetDictCompressionMode$LZ4WithPresetDictDecompressor.decompress(LZ4WithPresetDictCompressionMode.java:129)
app//org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader$BlockState.doReset(CompressingStoredFieldsReader.java:564)
app//org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader$BlockState.reset(CompressingStoredFieldsReader.java:466)
app//org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.document(CompressingStoredFieldsReader.java:656)
app//org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.visitDocument(CompressingStoredFieldsReader.java:678)
app//org.elasticsearch.search.internal.FieldUsageTrackingDirectoryReader$FieldUsageTrackingLeafReader$FieldUsageTrackingStoredFieldsReader.visitDocument(FieldUsageTrackingDirectoryReader.java:204)
app//org.elasticsearch.search.lookup.SourceLookup$$Lambda$6567/0x0000000801a8f4a8.accept(Unknown Source)
app//org.elasticsearch.search.lookup.SourceLookup.source(SourceLookup.java:73)
app//org.elasticsearch.script.AbstractFieldScript.extractFromSource(AbstractFieldScript.java:93)
app//org.elasticsearch.script.AbstractFieldScript.emitFromSource(AbstractFieldScript.java:109)
app//org.elasticsearch.script.StringFieldScript$1$1.execute(StringFieldScript.java:35)
app//org.elasticsearch.script.StringFieldScript.resultsForDoc(StringFieldScript.java:94)
app//org.elasticsearch.search.runtime.AbstractStringScriptFieldQuery.matches(AbstractStringScriptFieldQuery.java:27)
app//org.elasticsearch.search.runtime.AbstractStringScriptFieldQuery.matches(AbstractStringScriptFieldQuery.java:19)
app//org.elasticsearch.search.runtime.AbstractScriptFieldQuery$1$1.matches(AbstractScriptFieldQuery.java:76)
app//org.apache.lucene.search.ConjunctionDISI$ConjunctionTwoPhaseIterator.matches(ConjunctionDISI.java:381)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.scoreRange(Weight.java:265)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.score(Weight.java:245)
app//org.elasticsearch.search.internal.CancellableBulkScorer.score(CancellableBulkScorer.java:45)
app//org.apache.lucene.search.BulkScorer.score(BulkScorer.java:39)
app//org.elasticsearch.search.internal.ContextIndexSearcher.searchLeaf(ContextIndexSearcher.java:194)
app//org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:167)
app//org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:443)
app//org.elasticsearch.search.query.QueryPhase.searchWithCollector(QueryPhase.java:255)
app//org.elasticsearch.search.query.QueryPhase.executeInternal(QueryPhase.java:212)
app//org.elasticsearch.search.query.QueryPhase.execute(QueryPhase.java:98)
app//org.elasticsearch.indices.IndicesService.lambda$loadIntoContext$26(IndicesService.java:1522)
app//org.elasticsearch.indices.IndicesService$$Lambda$7190/0x0000000801baa218.accept(Unknown Source)
app//org.elasticsearch.indices.IndicesService.lambda$cacheShardLevelResult$27(IndicesService.java:1588)
app//org.elasticsearch.indices.IndicesService$$Lambda$7191/0x0000000801baa978.get(Unknown Source)
app//org.elasticsearch.indices.IndicesRequestCache$Loader.load(IndicesRequestCache.java:178)
app//org.elasticsearch.indices.IndicesRequestCache$Loader.load(IndicesRequestCache.java:161)
app//org.elasticsearch.common.cache.Cache.computeIfAbsent(Cache.java:419)
app//org.elasticsearch.indices.IndicesRequestCache.getOrCompute(IndicesRequestCache.java:124)
app//org.elasticsearch.indices.IndicesService.cacheShardLevelResult(IndicesService.java:1594)
app//org.elasticsearch.indices.IndicesService.loadIntoContext(IndicesService.java:1516)
app//org.elasticsearch.search.SearchService.loadOrExecuteQueryPhase(SearchService.java:456)
app//org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:622)
app//org.elasticsearch.search.SearchService.lambda$executeQueryPhase$2(SearchService.java:483)
app//org.elasticsearch.search.SearchService$$Lambda$6500/0x0000000801a72de8.get(Unknown Source)
app//org.elasticsearch.search.SearchService$$Lambda$6501/0x0000000801a73010.get(Unknown Source)
app//org.elasticsearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:47)
app//org.elasticsearch.action.ActionRunnable$$Lambda$6502/0x0000000801a73238.accept(Unknown Source)
app//org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
app//org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:33)
app//org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
java.base@16.0.2/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130)
java.base@16.0.2/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630)
java.base@16.0.2/java.lang.Thread.run(Thread.java:831)
2/10 snapshots sharing following 50 elements
java.base@16.0.2/java.util.Collections$UnmodifiableCollection$1.<init>(Collections.java:1046)
java.base@16.0.2/java.util.Collections$UnmodifiableCollection.iterator(Collections.java:1045)
java.base@16.0.2/java.util.AbstractCollection.addAll(AbstractCollection.java:335)
app//org.elasticsearch.index.fieldvisitor.FieldsVisitor.reset(FieldsVisitor.java:177)
app//org.elasticsearch.index.fieldvisitor.FieldsVisitor.<init>(FieldsVisitor.java:57)
app//org.elasticsearch.index.fieldvisitor.FieldsVisitor.<init>(FieldsVisitor.java:50)
app//org.elasticsearch.search.lookup.SourceLookup.source(SourceLookup.java:72)
app//org.elasticsearch.script.AbstractFieldScript.extractFromSource(AbstractFieldScript.java:93)
app//org.elasticsearch.script.AbstractFieldScript.emitFromSource(AbstractFieldScript.java:109)
app//org.elasticsearch.script.StringFieldScript$1$1.execute(StringFieldScript.java:35)
app//org.elasticsearch.script.StringFieldScript.resultsForDoc(StringFieldScript.java:94)
app//org.elasticsearch.search.runtime.AbstractStringScriptFieldQuery.matches(AbstractStringScriptFieldQuery.java:27)
app//org.elasticsearch.search.runtime.AbstractStringScriptFieldQuery.matches(AbstractStringScriptFieldQuery.java:19)
app//org.elasticsearch.search.runtime.AbstractScriptFieldQuery$1$1.matches(AbstractScriptFieldQuery.java:76)
app//org.apache.lucene.search.ConjunctionDISI$ConjunctionTwoPhaseIterator.matches(ConjunctionDISI.java:381)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.scoreRange(Weight.java:265)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.score(Weight.java:245)
app//org.elasticsearch.search.internal.CancellableBulkScorer.score(CancellableBulkScorer.java:45)
app//org.apache.lucene.search.BulkScorer.score(BulkScorer.java:39)
app//org.elasticsearch.search.internal.ContextIndexSearcher.searchLeaf(ContextIndexSearcher.java:194)
app//org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:167)
app//org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:443)
app//org.elasticsearch.search.query.QueryPhase.searchWithCollector(QueryPhase.java:255)
app//org.elasticsearch.search.query.QueryPhase.executeInternal(QueryPhase.java:212)
app//org.elasticsearch.search.query.QueryPhase.execute(QueryPhase.java:98)
app//org.elasticsearch.indices.IndicesService.lambda$loadIntoContext$26(IndicesService.java:1522)
app//org.elasticsearch.indices.IndicesService$$Lambda$7190/0x0000000801baa218.accept(Unknown Source)
app//org.elasticsearch.indices.IndicesService.lambda$cacheShardLevelResult$27(IndicesService.java:1588)
app//org.elasticsearch.indices.IndicesService$$Lambda$7191/0x0000000801baa978.get(Unknown Source)
app//org.elasticsearch.indices.IndicesRequestCache$Loader.load(IndicesRequestCache.java:178)
app//org.elasticsearch.indices.IndicesRequestCache$Loader.load(IndicesRequestCache.java:161)
app//org.elasticsearch.common.cache.Cache.computeIfAbsent(Cache.java:419)
app//org.elasticsearch.indices.IndicesRequestCache.getOrCompute(IndicesRequestCache.java:124)
app//org.elasticsearch.indices.IndicesService.cacheShardLevelResult(IndicesService.java:1594)
app//org.elasticsearch.indices.IndicesService.loadIntoContext(IndicesService.java:1516)
app//org.elasticsearch.search.SearchService.loadOrExecuteQueryPhase(SearchService.java:456)
app//org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:622)
app//org.elasticsearch.search.SearchService.lambda$executeQueryPhase$2(SearchService.java:483)
app//org.elasticsearch.search.SearchService$$Lambda$6500/0x0000000801a72de8.get(Unknown Source)
app//org.elasticsearch.search.SearchService$$Lambda$6501/0x0000000801a73010.get(Unknown Source)
app//org.elasticsearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:47)
app//org.elasticsearch.action.ActionRunnable$$Lambda$6502/0x0000000801a73238.accept(Unknown Source)
app//org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
app//org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:33)
app//org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
java.base@16.0.2/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130)
java.base@16.0.2/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630)
java.base@16.0.2/java.lang.Thread.run(Thread.java:831)
3/10 snapshots sharing following 43 elements
app//org.elasticsearch.script.AbstractFieldScript.extractFromSource(AbstractFieldScript.java:93)
app//org.elasticsearch.script.AbstractFieldScript.emitFromSource(AbstractFieldScript.java:109)
app//org.elasticsearch.script.StringFieldScript$1$1.execute(StringFieldScript.java:35)
app//org.elasticsearch.script.StringFieldScript.resultsForDoc(StringFieldScript.java:94)
app//org.elasticsearch.search.runtime.AbstractStringScriptFieldQuery.matches(AbstractStringScriptFieldQuery.java:27)
app//org.elasticsearch.search.runtime.AbstractStringScriptFieldQuery.matches(AbstractStringScriptFieldQuery.java:19)
app//org.elasticsearch.search.runtime.AbstractScriptFieldQuery$1$1.matches(AbstractScriptFieldQuery.java:76)
app//org.apache.lucene.search.ConjunctionDISI$ConjunctionTwoPhaseIterator.matches(ConjunctionDISI.java:381)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.scoreRange(Weight.java:265)
app//org.apache.lucene.search.Weight$DefaultBulkScorer.score(Weight.java:245)
app//org.elasticsearch.search.internal.CancellableBulkScorer.score(CancellableBulkScorer.java:45)
app//org.apache.lucene.search.BulkScorer.score(BulkScorer.java:39)
app//org.elasticsearch.search.internal.ContextIndexSearcher.searchLeaf(ContextIndexSearcher.java:194)
app//org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:167)
app//org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:443)
app//org.elasticsearch.search.query.QueryPhase.searchWithCollector(QueryPhase.java:255)
app//org.elasticsearch.search.query.QueryPhase.executeInternal(QueryPhase.java:212)
app//org.elasticsearch.search.query.QueryPhase.execute(QueryPhase.java:98)
app//org.elasticsearch.indices.IndicesService.lambda$loadIntoContext$26(IndicesService.java:1522)
app//org.elasticsearch.indices.IndicesService$$Lambda$7190/0x0000000801baa218.accept(Unknown Source)
app//org.elasticsearch.indices.IndicesService.lambda$cacheShardLevelResult$27(IndicesService.java:1588)
app//org.elasticsearch.indices.IndicesService$$Lambda$7191/0x0000000801baa978.get(Unknown Source)
app//org.elasticsearch.indices.IndicesRequestCache$Loader.load(IndicesRequestCache.java:178)
app//org.elasticsearch.indices.IndicesRequestCache$Loader.load(IndicesRequestCache.java:161)
app//org.elasticsearch.common.cache.Cache.computeIfAbsent(Cache.java:419)
app//org.elasticsearch.indices.IndicesRequestCache.getOrCompute(IndicesRequestCache.java:124)
app//org.elasticsearch.indices.IndicesService.cacheShardLevelResult(IndicesService.java:1594)
app//org.elasticsearch.indices.IndicesService.loadIntoContext(IndicesService.java:1516)
app//org.elasticsearch.search.SearchService.loadOrExecuteQueryPhase(SearchService.java:456)
app//org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:622)
app//org.elasticsearch.search.SearchService.lambda$executeQueryPhase$2(SearchService.java:483)
app//org.elasticsearch.search.SearchService$$Lambda$6500/0x0000000801a72de8.get(Unknown Source)
app//org.elasticsearch.search.SearchService$$Lambda$6501/0x0000000801a73010.get(Unknown Source)
app//org.elasticsearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:47)
app//org.elasticsearch.action.ActionRunnable$$Lambda$6502/0x0000000801a73238.accept(Unknown Source)
app//org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
app//org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:33)
app//org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777)
app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
java.base@16.0.2/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130)
java.base@16.0.2/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630)
java.base@16.0.2/java.lang.Thread.run(Thread.java:831)