My monitoring cluster is showing 99 percent Cpu usage during writes. Is it normal?
If it's a 4 cpu node, does it mean that max is 400 percent?
My monitoring cluster is showing 99 percent Cpu usage during writes. Is it normal?
If it's a 4 cpu node, does it mean that max is 400 percent?
What is the output from the _cluster/stats?pretty&human
API?
{
"_nodes" : {
"total" : 7,
"successful" : 7,
"failed" : 0
},
"cluster_name" : "",
"cluster_uuid" : "",
"timestamp" : 1621398779746,
"status" : "green",
"indices" : {
"count" : 136,
"shards" : {
"total" : 510,
"primaries" : 136,
"replication" : 2.75,
"index" : {
"shards" : {
"min" : 2,
"max" : 4,
"avg" : 3.75
},
"primaries" : {
"min" : 1,
"max" : 1,
"avg" : 1.0
},
"replication" : {
"min" : 1.0,
"max" : 3.0,
"avg" : 2.75
}
}
},
"docs" : {
"count" : 441856072,
"deleted" : 52934460
},
"store" : {
"size" : "786.5gb",
"size_in_bytes" : 844550889417
},
"fielddata" : {
"memory_size" : "5.1gb",
"memory_size_in_bytes" : 5543590276,
"evictions" : 0
},
"query_cache" : {
"memory_size" : "783mb",
"memory_size_in_bytes" : 821076571,
"total_count" : 59525413,
"hit_count" : 26836972,
"miss_count" : 32688441,
"cache_size" : 110034,
"cache_count" : 6331916,
"evictions" : 6221882
},
"completion" : {
"size" : "0b",
"size_in_bytes" : 0
},
"segments" : {
"count" : 7394,
"memory" : "843.1mb",
"memory_in_bytes" : 884078371,
"terms_memory" : "491.7mb",
"terms_memory_in_bytes" : 515677023,
"stored_fields_memory" : "330.8mb",
"stored_fields_memory_in_bytes" : 346965816,
"term_vectors_memory" : "0b",
"term_vectors_memory_in_bytes" : 0,
"norms_memory" : "13.2mb",
"norms_memory_in_bytes" : 13857408,
"points_memory" : "0b",
"points_memory_in_bytes" : 0,
"doc_values_memory" : "7.2mb",
"doc_values_memory_in_bytes" : 7578124,
"index_writer_memory" : "58.5mb",
"index_writer_memory_in_bytes" : 61417136,
"version_map_memory" : "2.6mb",
"version_map_memory_in_bytes" : 2817058,
"fixed_bit_set" : "2.2mb",
"fixed_bit_set_memory_in_bytes" : 2360888,
"max_unsafe_auto_id_timestamp" : 1621382402316,
"file_sizes" : { }
}
},
"nodes" : {
"count" : {
"total" : 7,
"coordinating_only" : 0,
"data" : 4,
"ingest" : 2,
"master" : 3,
"ml" : 7,
"voting_only" : 2
},
"versions" : [
"7.6.2"
],
"os" : {
"available_processors" : 28,
"allocated_processors" : 28,
"names" : [
{
"name" : "Linux",
"count" : 7
}
],
"pretty_names" : [
{
"pretty_name" : "Oracle Linux Server 7.7",
"count" : 3
},
{
"pretty_name" : "Oracle Linux Server 7.8",
"count" : 4
}
],
"mem" : {
"total" : "391.9gb",
"total_in_bytes" : 420849754112,
"free" : "110gb",
"free_in_bytes" : 118218153984,
"used" : "281.8gb",
"used_in_bytes" : 302631600128,
"free_percent" : 28,
"used_percent" : 72
}
},
"process" : {
"cpu" : {
"percent" : 0
},
"open_file_descriptors" : {
"min" : 406,
"max" : 1878,
"avg" : 1172
}
},
"jvm" : {
"max_uptime" : "253.8d",
"max_uptime_in_millis" : 21934311843,
"versions" : [
{
"version" : "1.8.0_262",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "",
"vm_vendor" : "",
"bundled_jdk" : false,
"using_bundled_jdk" : null,
"count" : 3
},
{
"version" : "1.8.0_282",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "",
"vm_vendor" : "",
"bundled_jdk" : false,
"using_bundled_jdk" : null,
"count" : 4
}
],
"mem" : {
"heap_used" : "37.3gb",
"heap_used_in_bytes" : 40115500776,
"heap_max" : "77.7gb",
"heap_max_in_bytes" : 83507806208
},
"threads" : 539
},
"fs" : {
"total" : "2.2tb",
"total_in_bytes" : 2468400332800,
"free" : "1.4tb",
"free_in_bytes" : 1623075577856,
"available" : "1.4tb",
"available_in_bytes" : 1623075577856
},
"plugins" : [ ],
"network_types" : {
"transport_types" : {
"security4" : 7
},
"http_types" : {
"security4" : 7
}
},
"discovery_types" : {
"zen" : 7
},
"packaging_types" : [
{
"flavor" : "default",
"type" : "tar",
"count" : 7
}
],
"ingest" : {
"number_of_pipelines" : 2,
"processor_stats" : {
"gsub" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
},
"script" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
}
}
}
}
}
Thanks, other than suggesting an upgrade (latest is 7.12) it looks ok.
What do the hot threads from the node at 100% show? Same with the Elasticsearch logs.
PFB the details for current time (couldn't capture for that moment of time). The thing is I am seeing this behaviour (CPU nearing to 100%), in almost all data nodes during writes. I am seeing this behaviour in other clusters too, so I am wondering is this normal for elasticsearch or 100% means just 1/4 of total cpu (for 4 cpu node) ?
::: {data_node_04}{}{}{data_node_04.com}{}{dl}{ml.machine_memory=67368677376, ml.max_open_jobs=20, xpack.installed=true}
Hot threads at 2021-05-20T07:16:54.357Z, interval=500ms, busiestThreads=3, ignoreIdleThreads=true:
::: {data_node_01}{}{}{data_node_01.com}{}{dlmv}{ml.machine_memory=67368726528, ml.max_open_jobs=20, xpack.installed=true}
Hot threads at 2021-05-20T07:16:54.358Z, interval=500ms, busiestThreads=3, ignoreIdleThreads=true:
::: {data_node_02}{}{}{data_node_02.com}{}{dlmv}{ml.machine_memory=67368677376, ml.max_open_jobs=20, xpack.installed=true}
Hot threads at 2021-05-20T07:16:54.357Z, interval=500ms, busiestThreads=3, ignoreIdleThreads=true:
::: {data_node_03}{}{}{data_node_03.com}{}{dl}{ml.machine_memory=67368726528, ml.max_open_jobs=20, xpack.installed=true}
Hot threads at 2021-05-20T07:16:54.358Z, interval=500ms, busiestThreads=3, ignoreIdleThreads=true:
This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.
© 2020. All Rights Reserved - Elasticsearch
Apache, Apache Lucene, Apache Hadoop, Hadoop, HDFS and the yellow elephant logo are trademarks of the Apache Software Foundation in the United States and/or other countries.