It is not clear what your issue/question is, so if you want someone to be able to help you will need to elaborate and provide context. Please also do not post images of text as these can not be searched and may not be readable to everyone.
With 100 nodes in cluster, but only one node for all threads inquiries slow
at org.apache.lucene.util.bkd.DocIdsWriter.readDeltaVInts(DocIdsWriter.java:141)
at org.apache.lucene.util.bkd.DocIdsWriter.readInts(DocIdsWriter.java:124)
at org.apache.lucene.util.bkd.BKDReader.visitDocIDs(BKDReader.java:424)
at org.apache.lucene.util.bkd.BKDReader.addAll(BKDReader.java:386)
at org.apache.lucene.util.bkd.BKDReader.addAll(BKDReader.java:391)
at org.apache.lucene.util.bkd.BKDReader.addAll(BKDReader.java:395)
at org.apache.lucene.util.bkd.BKDReader.addAll(BKDReader.java:395)
at org.apache.lucene.util.bkd.BKDReader.addAll(BKDReader.java:391)
at org.apache.lucene.util.bkd.BKDReader.addAll(BKDReader.java:391)
at org.apache.lucene.util.bkd.BKDReader.intersect(BKDReader.java:629)
at org.apache.lucene.util.bkd.BKDReader.intersect(BKDReader.java:674)
at org.apache.lucene.util.bkd.BKDReader.intersect(BKDReader.java:674)
at org.apache.lucene.util.bkd.BKDReader.intersect(BKDReader.java:664)
at org.apache.lucene.util.bkd.BKDReader.intersect(BKDReader.java:664)
at org.apache.lucene.util.bkd.BKDReader.intersect(BKDReader.java:362)
at org.elasticsearch.search.internal.ExitableDirectoryReader$ExitablePointValues.intersect(ExitableDirectoryReader.java:193)
at org.apache.lucene.search.PointInSetQuery$1.scorer(PointInSetQuery.java:154)
at org.apache.lucene.search.Weight.scorerSupplier(Weight.java:148)
at org.apache.lucene.search.LRUQueryCache$CachingWrapperWeight.scorerSupplier(LRUQueryCache.java:732)
at org.elasticsearch.indices.IndicesQueryCache$CachingWeightWrapper.scorerSupplier(IndicesQueryCache.java:159)
at org.apache.lucene.search.BooleanWeight.scorerSupplier(BooleanWeight.java:379)
at org.apache.lucene.search.LRUQueryCache$CachingWrapperWeight.scorerSupplier(LRUQueryCache.java:732)
at org.elasticsearch.indices.IndicesQueryCache$CachingWeightWrapper.scorerSupplier(IndicesQueryCache.java:159)
at org.apache.lucene.search.BooleanWeight.scorerSupplier(BooleanWeight.java:379)
at org.apache.lucene.search.LRUQueryCache$CachingWrapperWeight.scorerSupplier(LRUQueryCache.java:732)
at org.elasticsearch.indices.IndicesQueryCache$CachingWeightWrapper.scorerSupplier(IndicesQueryCache.java:159)
at org.apache.lucene.search.BooleanWeight.scorerSupplier(BooleanWeight.java:379)
at org.apache.lucene.search.BooleanWeight.scorer(BooleanWeight.java:344)
at org.apache.lucene.search.Weight.bulkScorer(Weight.java:182)
at org.apache.lucene.search.BooleanWeight.bulkScorer(BooleanWeight.java:338)
at org.apache.lucene.search.LRUQueryCache$CachingWrapperWeight.bulkScorer(LRUQueryCache.java:843)
at org.elasticsearch.indices.IndicesQueryCache$CachingWeightWrapper.bulkScorer(IndicesQueryCache.java:165)
at org.elasticsearch.search.internal.ContextIndexSearcher$1.bulkScorer(ContextIndexSearcher.java:244)
at org.elasticsearch.search.internal.ContextIndexSearcher.searchLeaf(ContextIndexSearcher.java:191)
at org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:167)
at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:443)
at org.elasticsearch.search.query.QueryPhase.searchWithCollector(QueryPhase.java:255)
at org.elasticsearch.search.query.QueryPhase.executeInternal(QueryPhase.java:212)
at org.elasticsearch.search.query.QueryPhase.execute(QueryPhase.java:98)
at org.elasticsearch.search.SearchService.loadOrExecuteQueryPhase(SearchService.java:458)
at org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:622)
at org.elasticsearch.search.SearchService.lambda$executeQueryPhase$2(SearchService.java:483)
at org.elasticsearch.search.SearchService$$Lambda$6771/0x00002b84ae011d00.get(Unknown Source)
at org.elasticsearch.search.SearchService$$Lambda$6772/0x00002b84ae012840.get(Unknown Source)
at org.elasticsearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:47)
at org.elasticsearch.action.ActionRunnable$$Lambda$6773/0x00002b84ae0130b0.accept(Unknown Source)
at org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62)
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
at org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:33)
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777)
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@12/ThreadPoolExecutor.java:1128)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@12/ThreadPoolExecutor.java:628)
at java.lang.Thread.run(java.base@12/Thread.java:835)
All the threads are stuck here, resulting in a high cpu
What is the full output of the hot threads API? What is the use case? What type of load is the cluster under?
What is the full output of the cluster stats API?
It's a similar problem
Do you have monitoring enabled? If so, what does the period where the node CPU jumps look like? What does disk I/O and await look like at that point? What is the hardware specification and configuration of the cluster?
Can you please provide this?
There are no statistics when problems occur
This API can be called any time as it shows statistics about the cluster as a whole.
{
"_nodes" : {
"total" : 98,
"successful" : 98,
"failed" : 0
},
"cluster_name" : "es",
"cluster_uuid" : "k4mkhPKHRcakOrgIQ40K9g",
"timestamp" : 1713775442369,
"status" : "green",
"indices" : {
"count" : 153,
"shards" : {
"total" : 11496,
"primaries" : 6376,
"replication" : 0.8030112923462986,
"index" : {
"shards" : {
"min" : 1,
"max" : 360,
"avg" : 75.13725490196079
},
"primaries" : {
"min" : 1,
"max" : 180,
"avg" : 41.673202614379086
},
"replication" : {
"min" : 0.0,
"max" : 1.0,
"avg" : 0.7450980392156863
}
}
},
"docs" : {
"count" : 115365179917,
"deleted" : 24598279033
},
"store" : {
"size_in_bytes" : 293971694284887,
"total_data_set_size_in_bytes" : 293971694284887,
"reserved_in_bytes" : 0
},
"fielddata" : {
"memory_size_in_bytes" : 75809453584,
"evictions" : 0
},
"query_cache" : {
"memory_size_in_bytes" : 260839222808,
"total_count" : 132792635420,
"hit_count" : 15059740461,
"miss_count" : 117732894959,
"cache_size" : 1899613,
"cache_count" : 1323518014,
"evictions" : 1321618401
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 296424,
"memory_in_bytes" : 27569357024,
"terms_memory_in_bytes" : 9722316368,
"stored_fields_memory_in_bytes" : 16017000936,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 37255680,
"points_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 1792784040,
"index_writer_memory_in_bytes" : 569606576,
"version_map_memory_in_bytes" : 15647668,
"fixed_bit_set_memory_in_bytes" : 38420760376,
"max_unsafe_auto_id_timestamp" : 1713757943764,
"file_sizes" : { }
},
"mappings" : {
"field_types" : [
{
"name" : "boolean",
"count" : 1118,
"index_count" : 147,
"script_count" : 0
},
{
"name" : "byte",
"count" : 136,
"index_count" : 136,
"script_count" : 0
},
{
"name" : "constant_keyword",
"count" : 3,
"index_count" : 1,
"script_count" : 0
},
{
"name" : "date",
"count" : 1957,
"index_count" : 151,
"script_count" : 0
},
{
"name" : "float",
"count" : 52,
"index_count" : 6,
"script_count" : 0
},
{
"name" : "geo_point",
"count" : 272,
"index_count" : 137,
"script_count" : 0
},
{
"name" : "half_float",
"count" : 40,
"index_count" : 10,
"script_count" : 0
},
{
"name" : "integer",
"count" : 2564,
"index_count" : 143,
"script_count" : 0
},
{
"name" : "ip",
"count" : 139,
"index_count" : 139,
"script_count" : 0
},
{
"name" : "keyword",
"count" : 15090,
"index_count" : 152,
"script_count" : 0
},
{
"name" : "long",
"count" : 1301,
"index_count" : 149,
"script_count" : 0
},
{
"name" : "nested",
"count" : 1103,
"index_count" : 144,
"script_count" : 0
},
{
"name" : "object",
"count" : 863,
"index_count" : 149,
"script_count" : 0
},
{
"name" : "text",
"count" : 2207,
"index_count" : 147,
"script_count" : 0
},
{
"name" : "version",
"count" : 1,
"index_count" : 1,
"script_count" : 0
}
],
"runtime_field_types" : [ ]
},
"analysis" : {
"char_filter_types" : [
{
"name" : "pattern_replace",
"count" : 275,
"index_count" : 138
}
],
"tokenizer_types" : [
{
"name" : "pattern",
"count" : 138,
"index_count" : 138
}
],
"filter_types" : [ ],
"analyzer_types" : [
{
"name" : "custom",
"count" : 414,
"index_count" : 138
}
],
"built_in_char_filters" : [ ],
"built_in_tokenizers" : [
{
"name" : "ik_max_word",
"count" : 138,
"index_count" : 138
}
],
"built_in_filters" : [
{
"name" : "asciifolding",
"count" : 138,
"index_count" : 138
},
{
"name" : "kstem",
"count" : 138,
"index_count" : 138
},
{
"name" : "lowercase",
"count" : 138,
"index_count" : 138
},
{
"name" : "stop",
"count" : 138,
"index_count" : 138
}
],
"built_in_analyzers" : [
{
"name" : "ik_max_word",
"count" : 411,
"index_count" : 137
}
]
},
"versions" : [
{
"version" : "6.8.0",
"index_count" : 91,
"primary_shard_count" : 5292,
"total_primary_bytes" : 117011957401975
},
{
"version" : "7.17.7",
"index_count" : 70,
"primary_shard_count" : 1624,
"total_primary_bytes" : 49588724327462
}
]
},
"nodes" : {
"count" : {
"total" : 98,
"coordinating_only" : 0,
"data" : 98,
"data_cold" : 98,
"data_content" : 98,
"data_frozen" : 98,
"data_hot" : 98,
"data_warm" : 98,
"ingest" : 98,
"master" : 5,
"ml" : 98,
"remote_cluster_client" : 98,
"transform" : 98,
"voting_only" : 0
},
"versions" : [
"7.17.7"
],
"os" : {
"available_processors" : 3920,
"allocated_processors" : 3920,
"names" : [
{
"name" : "Linux",
"count" : 98
}
],
"pretty_names" : [
{
"pretty_name" : "CentOS Linux 7 (Core)",
"count" : 98
}
],
"architectures" : [
{
"arch" : "amd64",
"count" : 98
}
],
"mem" : {
"total_in_bytes" : 36658621538304,
"free_in_bytes" : 495309242368,
"used_in_bytes" : 36163312295936,
"free_percent" : 1,
"used_percent" : 99
}
},
"process" : {
"cpu" : {
"percent" : 667
},
"open_file_descriptors" : {
"min" : 10944,
"max" : 11713,
"avg" : 11288
}
},
"jvm" : {
"max_uptime_in_millis" : 439672788,
"versions" : [
{
"version" : "12",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "12+33",
"vm_vendor" : "Oracle Corporation",
"bundled_jdk" : false,
"using_bundled_jdk" : null,
"count" : 98
}
],
"mem" : {
"heap_used_in_bytes" : 2875237156712,
"heap_max_in_bytes" : 5261334937600
},
"threads" : 24143
},
"fs" : {
"total_in_bytes" : 275863027843072,
"free_in_bytes" : 123361599119360,
"available_in_bytes" : 109341616345088
},
"plugins" : [
{
"name" : "analysis-icu",
"version" : "7.17.7",
"elasticsearch_version" : "7.17.7",
"java_version" : "1.8",
"description" : "The ICU Analysis plugin integrates the Lucene ICU module into Elasticsearch, adding ICU-related analysis components.",
"classname" : "org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin",
"extended_plugins" : [ ],
"has_native_controller" : false,
"licensed" : false,
"type" : "isolated"
},
{
"name" : "repository-hdfs",
"version" : "7.17.7",
"elasticsearch_version" : "7.17.7",
"java_version" : "1.8",
"description" : "The HDFS repository plugin adds support for Hadoop Distributed File-System (HDFS) repositories.",
"classname" : "org.elasticsearch.repositories.hdfs.HdfsPlugin",
"extended_plugins" : [ ],
"has_native_controller" : false,
"licensed" : false,
"type" : "isolated"
},
{
"name" : "analysis-ik",
"version" : "7.17.7",
"elasticsearch_version" : "7.17.7",
"java_version" : "1.8",
"description" : "IK Analyzer for Elasticsearch",
"classname" : "org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin",
"extended_plugins" : [ ],
"has_native_controller" : false,
"licensed" : false,
"type" : "isolated"
}
],
"network_types" : {
"transport_types" : {
"security4" : 98
},
"http_types" : {
"security4" : 98
}
},
"discovery_types" : {
"zen" : 98
},
"packaging_types" : [
{
"flavor" : "unknown",
"type" : "unknown",
"count" : 98
}
],
"ingest" : {
"number_of_pipelines" : 3,
"processor_stats" : {
"gsub" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
},
"remove" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
},
"script" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
},
"set" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
},
"set_security_user" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
}
}
}
}
}