Hi everyone,
My problem is some indices have red status without reason.
ENV:
- Elasticsearch 7.9.0
- Centos 7
- Clustering 4 node (3 data-master Node + 1 Coordinator Node)
- Using logstash to parsing data and using elasticsearch ingest
- Indices with 1 rep and 2 shard
- No alert about Disk watermark (disk free 30-40% per node)
- No error on cluster log
Some information detail
{
"cluster_uuid" : "Qzb3n-hDTVaN5YBdRgBBlg",
"version" : {
"number" : "7.9.0",
"build_flavor" : "default",
"build_type" : "rpm",
"build_hash" : "a479a2a7fce0389512d6a9361301708b92dff667",
"build_date" : "2020-08-11T21:36:48.204330Z",
"build_snapshot" : false,
"lucene_version" : "8.6.0",
"minimum_wire_compatibility_version" : "6.8.0",
"minimum_index_compatibility_version" : "6.0.0-beta1"
}
}
Cluster Health
GET _cluster/health
{
"status" : "red",
"timed_out" : false,
"number_of_nodes" : 4,
"number_of_data_nodes" : 3,
"active_primary_shards" : 779,
"active_shards" : 1389,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 109,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 0,
"number_of_in_flight_fetch" : 0,
"task_max_waiting_in_queue_millis" : 0,
"active_shards_percent_as_number" : 92.72363150867824
}
Indices status
GET _cat/shards?h=index,shard,prirep,state,unassigned.reason
index-software-2021.06 1 r UNASSIGNED NODE_LEFT
index-software-2021.06 1 p UNASSIGNED ALLOCATION_FAILED
index-software-2021.06 0 r UNASSIGNED NODE_LEFT
index-software-2021.06 0 p UNASSIGNED ALLOCATION_FAILED
Index explain
GET /_cluster/allocation/explain
{
"index": "index-software-2021.05",
"shard": 1,
"primary": true
}
=>
{
"index" : "ca-signed-software-2021.06",
"shard" : 1,
"primary" : true,
"current_state" : "unassigned",
"unassigned_info" : {
"reason" : "ALLOCATION_FAILED",
"at" : "2021-06-21T05:13:43.804Z",
"failed_allocation_attempts" : 5,
"details" : "failed shard on node [vinoghVsS82uLdJogTO3lw]: failed to create shard, failure IOException[failed to obtain in-memory shard lock]; nested: ShardLockObtainFailedException[[ca-signed-software-2021.06][1]: obtaining shard lock timed out after 5000ms, previous lock details: [shard creation] trying to lock for [shard creation]]; ",
"last_allocation_status" : "no"
},
"can_allocate" : "no",
"allocate_explanation" : "cannot allocate because allocation is not permitted to any of the nodes that hold an in-sync shard copy",
"node_allocation_decisions" : [
{
"node_id" : "AyR07ULeS2CUxrwyEzwb7g",
"node_name" : "node-2",
"transport_address" : "10.36.13.202:9300",
"node_attributes" : {
"ml.machine_memory" : "16654782464",
"ml.max_open_jobs" : "20",
"xpack.installed" : "true",
"data" : "warm",
"transform.node" : "true"
},
"node_decision" : "no",
"store" : {
"found" : false
}
},
{
"node_id" : "WjOITVufQKKkJk6Ngts9Pg",
"node_name" : "node-1",
"transport_address" : "10.36.13.201:9300",
"node_attributes" : {
"ml.machine_memory" : "16654790656",
"ml.max_open_jobs" : "20",
"xpack.installed" : "true",
"data" : "hot",
"transform.node" : "true"
},
"node_decision" : "no",
"store" : {
"in_sync" : false,
"allocation_id" : "EsoB8tIxTqWhLtUPkUpU7g"
}
},
{
"node_id" : "vinoghVsS82uLdJogTO3lw",
"node_name" : "node-3",
"transport_address" : "10.36.13.203:9300",
"node_attributes" : {
"ml.machine_memory" : "16654790656",
"ml.max_open_jobs" : "20",
"xpack.installed" : "true",
"data" : "cold",
"transform.node" : "true"
},
"node_decision" : "no",
"store" : {
"in_sync" : true,
"allocation_id" : "6M2dv6htQgqZiwCqlAQ5tA"
},
"deciders" : [
{
"decider" : "max_retry",
"decision" : "NO",
"explanation" : "shard has exceeded the maximum number of retries [5] on failed allocation attempts - manually call [/_cluster/reroute?retry_failed=true] to retry, [unassigned_info[[reason=ALLOCATION_FAILED], at[2021-06-21T05:13:43.804Z], failed_attempts[5], failed_nodes[[vinoghVsS82uLdJogTO3lw]], delayed=false, details[failed shard on node [vinoghVsS82uLdJogTO3lw]: failed to create shard, failure IOException[failed to obtain in-memory shard lock]; nested: ShardLockObtainFailedException[[ca-signed-software-2021.06][1]: obtaining shard lock timed out after 5000ms, previous lock details: [shard creation] trying to lock for [shard creation]]; ], allocation_status[deciders_no]]]"
}
]
}
]
}