We are maintaining a 10 node cluster (5 hot nodes with SSD storage and 5 warm nodes with SAS storage, latest Elasticsearch version by the way). The shard allocation for a specific index like looks this:
GET _cat/shards/logstash-windows-2021.05?v
index shard prirep state docs store ip node
logstash-windows-2021.05 2 p STARTED 59324144 8.4gb 10.0.2.47 elastic03-warm
logstash-windows-2021.05 2 r UNASSIGNED
logstash-windows-2021.05 1 p STARTED 59316839 8.4gb 10.0.2.47 elastic03-warm
logstash-windows-2021.05 1 r UNASSIGNED
logstash-windows-2021.05 3 p STARTED 59325907 8.4gb 10.0.2.47 elastic03-warm
logstash-windows-2021.05 3 r UNASSIGNED
logstash-windows-2021.05 4 p STARTED 59342868 8.4gb 10.0.2.47 elastic03-warm
logstash-windows-2021.05 4 r UNASSIGNED
logstash-windows-2021.05 0 p STARTED 59323317 8.4gb 10.0.2.47 elastic03-warm
logstash-windows-2021.05 0 r UNASSIGNED
As you can see, all primary shards belong to one node (which is not how it should be) and no replica shards are assigned to any node. This leads to a yellow cluster state. The explain endpoint tells me:
Request
GET /_cluster/allocation/explain
{
"index": "logstash-windows-2021.05",
"shard": 0,
"primary": true
}
Response
{
"index" : "logstash-windows-2021.05",
"shard" : 0,
"primary" : true,
"current_state" : "started",
"current_node" : {
"id" : "OEsq71lQTXOGnRVaSTnsFQ",
"name" : "elastic03-warm",
"transport_address" : "10.0.2.47:9300",
"attributes" : {
"ml.machine_memory" : "67189411840",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"box_type" : "warm",
"ml.max_jvm_size" : "17179869184",
"transform.node" : "true"
},
"weight_ranking" : 10
},
"can_remain_on_current_node" : "yes",
"can_rebalance_cluster" : "no",
"can_rebalance_cluster_decisions" : [
{
"decider" : "rebalance_only_when_active",
"decision" : "NO",
"explanation" : "rebalancing is not allowed until all replicas in the cluster are active"
},
{
"decider" : "cluster_rebalance",
"decision" : "NO",
"explanation" : "the cluster has unassigned shards and cluster setting [cluster.routing.allocation.allow_rebalance] is set to [indices_all_active]"
}
],
"can_rebalance_to_other_node" : "no",
"rebalance_explanation" : "rebalancing is not allowed",
"node_allocation_decisions" : [
{
"node_id" : "iBYZCYt2R26EH26NyN9cSQ",
"node_name" : "elastic04-hot",
"transport_address" : "10.0.2.55:9300",
"node_attributes" : {
"ml.machine_memory" : "67189420032",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"box_type" : "hot",
"ml.max_jvm_size" : "17179869184",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 1,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"warm",_id:"OEsq71lQTXOGnRVaSTnsFQ"]"""
}
]
},
{
"node_id" : "fyIdSP98TA-n34cCevwQmQ",
"node_name" : "elastic01-hot",
"transport_address" : "10.0.2.54:9300",
"node_attributes" : {
"ml.machine_memory" : "67189411840",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"box_type" : "hot",
"ml.max_jvm_size" : "17179869184",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 2,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"warm",_id:"OEsq71lQTXOGnRVaSTnsFQ"]"""
}
]
},
{
"node_id" : "u6WK5-8AQi21WlJy9Do2cg",
"node_name" : "elastic02-hot",
"transport_address" : "10.0.2.52:9300",
"node_attributes" : {
"ml.machine_memory" : "67189420032",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"box_type" : "hot",
"ml.max_jvm_size" : "17179869184",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 3,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"warm",_id:"OEsq71lQTXOGnRVaSTnsFQ"]"""
}
]
},
{
"node_id" : "Z6RvCoyNSxWkDaxjejtH9g",
"node_name" : "elastic03-hot",
"transport_address" : "10.0.2.57:9300",
"node_attributes" : {
"ml.machine_memory" : "67189411840",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"box_type" : "hot",
"ml.max_jvm_size" : "17179869184",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 4,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"warm",_id:"OEsq71lQTXOGnRVaSTnsFQ"]"""
}
]
},
{
"node_id" : "ROTrmlGqRi2wJlVDvk-YaA",
"node_name" : "elastic05-hot",
"transport_address" : "10.0.2.46:9300",
"node_attributes" : {
"ml.machine_memory" : "67189420032",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"box_type" : "hot",
"ml.max_jvm_size" : "17179869184",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 5,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"warm",_id:"OEsq71lQTXOGnRVaSTnsFQ"]"""
}
]
},
{
"node_id" : "4PyMHQw2SWSTzpQmtZKAAw",
"node_name" : "elastic04-warm",
"transport_address" : "10.0.2.56:9300",
"node_attributes" : {
"ml.machine_memory" : "67189420032",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"box_type" : "warm",
"ml.max_jvm_size" : "17179869184",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 6,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"warm",_id:"OEsq71lQTXOGnRVaSTnsFQ"]"""
}
]
},
{
"node_id" : "t2-TCIqfTOSeLhAjR568OA",
"node_name" : "elastic05-warm",
"transport_address" : "10.0.2.49:9300",
"node_attributes" : {
"ml.machine_memory" : "67189420032",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"box_type" : "warm",
"ml.max_jvm_size" : "17179869184",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 7,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"warm",_id:"OEsq71lQTXOGnRVaSTnsFQ"]"""
}
]
},
{
"node_id" : "7RfXFUNkSy6CG9uLjOhD5Q",
"node_name" : "elastic02-warm",
"transport_address" : "10.0.2.51:9300",
"node_attributes" : {
"ml.machine_memory" : "67189420032",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"box_type" : "warm",
"ml.max_jvm_size" : "17179869184",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 8,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"warm",_id:"OEsq71lQTXOGnRVaSTnsFQ"]"""
}
]
},
{
"node_id" : "mnioyEK8R2iz5rQErRb9Jw",
"node_name" : "elastic01-warm",
"transport_address" : "10.0.2.53:9300",
"node_attributes" : {
"ml.machine_memory" : "67189411840",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"box_type" : "warm",
"ml.max_jvm_size" : "17179869184",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 9,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"warm",_id:"OEsq71lQTXOGnRVaSTnsFQ"]"""
}
]
}
]
}
The not initialized replica shards (and also a few primary shards) should be assigned/balanced to 5 of the existing nodes with box_type attribute "warm". You can see that they exist but the explanation api tells me nonsense:
"node does not match index setting [index.routing.allocation.require] filters [box_type:"warm",_id:"OEsq71lQTXOGnRVaSTnsFQ"]"
Why is that? Any ideas how to fix that?