Frequent shards failure remediation

Hii Folks,
Does anyone can help if there exists an api command to get the number of used, unused and total shards of my elasticsearch cluster?
Additionally, How can i increase the total number of shards of my multi node elasticsearch cluster.

Any suggestions on this would be very helpful.

What is the problem you are trying to solve? Why are the shards failing? It would help if you could elaborate and describe the issue in detail.

There is also some additional information that is generally useful:

  • Which version of Elasticsearch are you using?
  • What is the full output of the cluster stats API?
  • What is the hardware specification of the cluster? What type of storage are you using?

My shards are getting filled very often, so i wanted to add one more node to my three-node cluster Elasticsearch to increase the number of shards. Currently I have three nodes all three are assigned to be master as well as data node. Please let me know if adding one more node will be a good approach to get the number of shards increased and so that my shards doesn't gets full too quickly bringing down the cluster. If there is any better approach which we can try, then please share it with me. Also, before increasing the shards i wanted to know the used and the unused shards count for the current cluster. Is there any Api to get this output?

#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See Set up minimal security for Elasticsearch | Elasticsearch Guide [7.16] | Elastic to enable security.
{
"_nodes" : {
"total" : 3,
"successful" : 3,
"failed" : 0
},
"cluster_name" : "elk-cluster-production",
"cluster_uuid" : "XXXXXXXXXXXXX",
"timestamp" : 1714983938071,
"status" : "green",
"indices" : {
"count" : 1552,
"shards" : {
"total" : 3104,
"primaries" : 1552,
"replication" : 1.0,
"index" : {
"shards" : {
"min" : 2,
"max" : 2,
"avg" : 2.0
},
"primaries" : {
"min" : 1,
"max" : 1,
"avg" : 1.0
},
"replication" : {
"min" : 1.0,
"max" : 1.0,
"avg" : 1.0
}
}
},
"docs" : {
"count" : 729063463,
"deleted" : 464693
},
"store" : {
"size_in_bytes" : 552320981936,
"total_data_set_size_in_bytes" : 552320981936,
"reserved_in_bytes" : 0
},
"fielddata" : {
"memory_size_in_bytes" : 1775336,
"evictions" : 0
},
"query_cache" : {
"memory_size_in_bytes" : 8767640,
"total_count" : 154169445,
"hit_count" : 5443782,
"miss_count" : 148725663,
"cache_size" : 66,
"cache_count" : 117167,
"evictions" : 117101
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 23513,
"memory_in_bytes" : 211416880,
"terms_memory_in_bytes" : 159837328,
"stored_fields_memory_in_bytes" : 13902232,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 18654656,
"points_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 19022664,
"index_writer_memory_in_bytes" : 845634784,
"version_map_memory_in_bytes" : 7376535,
"fixed_bit_set_memory_in_bytes" : 118083096,
"max_unsafe_auto_id_timestamp" : 1714971640400,
"file_sizes" : { }
},
"mappings" : {
"field_types" : [
{
"name" : "alias",
"count" : 666,
"index_count" : 48,
"script_count" : 0
},
{
"name" : "boolean",
"count" : 6150,
"index_count" : 52,
"script_count" : 0
},
{
"name" : "constant_keyword",
"count" : 297,
"index_count" : 99,
"script_count" : 0
},
{
"name" : "date",
"count" : 11652,
"index_count" : 1540,
"script_count" : 0
},
{
"name" : "double",
"count" : 1988,
"index_count" : 48,
"script_count" : 0
},
{
"name" : "flattened",
"count" : 2016,
"index_count" : 46,
"script_count" : 0
},
{
"name" : "float",
"count" : 3736,
"index_count" : 981,
"script_count" : 0
},
{
"name" : "geo_point",
"count" : 474,
"index_count" : 48,
"script_count" : 0
},
{
"name" : "ip",
"count" : 7909,
"index_count" : 101,
"script_count" : 0
},
{
"name" : "keyword",
"count" : 237609,
"index_count" : 1541,
"script_count" : 0
},
{
"name" : "long",
"count" : 74942,
"index_count" : 464,
"script_count" : 0
},
{
"name" : "match_only_text",
"count" : 2898,
"index_count" : 46,
"script_count" : 0
},
{
"name" : "nested",
"count" : 786,
"index_count" : 50,
"script_count" : 0
},
{
"name" : "object",
"count" : 42664,
"index_count" : 517,
"script_count" : 0
},
{
"name" : "scaled_float",
"count" : 88,
"index_count" : 46,
"script_count" : 0
},
{
"name" : "short",
"count" : 9346,
"index_count" : 48,
"script_count" : 0
},
{
"name" : "text",
"count" : 24586,
"index_count" : 1541,
"script_count" : 0
},
{
"name" : "version",
"count" : 4,
"index_count" : 4,
"script_count" : 0
},
{
"name" : "wildcard",
"count" : 782,
"index_count" : 46,
"script_count" : 0
}
],
"runtime_field_types" : [
{
"name" : "keyword",
"count" : 3307,
"index_count" : 733,
"scriptless_count" : 0,
"shadowed_count" : 2367,
"lang" : [
"painless"
],
"lines_max" : 1,
"lines_total" : 3307,
"chars_max" : 488,
"chars_total" : 916096,
"source_max" : 2,
"source_total" : 4471,
"doc_max" : 1,
"doc_total" : 940
}
]
},
"analysis" : {
"char_filter_types" : ,
"tokenizer_types" : ,
"filter_types" : ,
"analyzer_types" : ,
"built_in_char_filters" : ,
"built_in_tokenizers" : ,
"built_in_filters" : ,
"built_in_analyzers" :
},
"versions" : [
{
"version" : "7.16.3",
"index_count" : 1552,
"primary_shard_count" : 1552,
"total_primary_bytes" : 276118945897
}
]
},
"nodes" : {
"count" : {
"total" : 3,
"coordinating_only" : 0,
"data" : 3,
"data_cold" : 3,
"data_content" : 3,
"data_frozen" : 3,
"data_hot" : 3,
"data_warm" : 3,
"ingest" : 3,
"master" : 3,
"ml" : 3,
"remote_cluster_client" : 3,
"transform" : 3,
"voting_only" : 0
},
"versions" : [
"7.16.3"
],
"os" : {
"available_processors" : 280,
"allocated_processors" : 280,
"names" : [
{
"name" : "Linux",
"count" : 3
}
],
"pretty_names" : [
{
"pretty_name" : "CentOS Linux 7 (Core)",
"count" : 3
}
],
"architectures" : [
{
"arch" : "amd64",
"count" : 3
}
],
"mem" : {
"total_in_bytes" : 2975974805504,
"free_in_bytes" : 1485729288192,
"used_in_bytes" : 1490245517312,
"free_percent" : 50,
"used_percent" : 50
}
},
"process" : {
"cpu" : {
"percent" : 0
},
"open_file_descriptors" : {
"min" : 7527,
"max" : 7934,
"avg" : 7706
}
},
"jvm" : {
"max_uptime_in_millis" : 3264434007,
"versions" : [
{
"version" : "17.0.1",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "17.0.1+12",
"vm_vendor" : "Eclipse Adoptium",
"bundled_jdk" : true,
"using_bundled_jdk" : true,
"count" : 3
}
],
"mem" : {
"heap_used_in_bytes" : 20455240400,
"heap_max_in_bytes" : 96636764160
},
"threads" : 1253
},
"fs" : {
"total_in_bytes" : 105553116266496,
"free_in_bytes" : 41639504805888,
"available_in_bytes" : 41639504805888
},
"plugins" : ,
"network_types" : {
"transport_types" : {
"security4" : 3
},
"http_types" : {
"security4" : 3
}
},
"discovery_types" : {
"zen" : 3
},
"packaging_types" : [
{
"flavor" : "default",
"type" : "rpm",
"count" : 3
}
],
"ingest" : {
"number_of_pipelines" : 6,
"processor_stats" : {
"conditional" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
},
"date" : {
"count" : 133432,
"failed" : 0,
"current" : 0,
"time_in_millis" : 30813
},
"geoip" : {
"count" : 133432,
"failed" : 0,
"current" : 0,
"time_in_millis" : 5376
},
"grok" : {
"count" : 41655730,
"failed" : 41388866,
"current" : 1,
"time_in_millis" : 76074665
},
"gsub" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
},
"remove" : {
"count" : 133432,
"failed" : 0,
"current" : 0,
"time_in_millis" : 1013
},
"rename" : {
"count" : 81,
"failed" : 0,
"current" : 0,
"time_in_millis" : 3
},
"script" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
},
"set" : {
"count" : 81,
"failed" : 0,
"current" : 0,
"time_in_millis" : 14
},
"uri_parts" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
},
"user_agent" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
}
}
}
}
}

Can anyone please suggest