Hi,
when I have a 3 node cluster and and 3 shards (1 primary, 2 replica) per index, as to my experience it is not possible to evacuate a node completely, which makes sense to me. Is it safe in this case to stop Elasticsearch service node by node for maintenance?
After re-including a node to the cluster, no primary shards get scheduled to that node. Is this expected behavior? Can this be a problem? If yes, how can I achieve automatic re-balancing of primary shards? Config looks like this:
# curl -s -k -u elastic:$elasticpw -X GET 'https://localhost:9200/_cluster/settings?include_defaults' | jq '.defaults.cluster.routing.allocation'
{
"use_adaptive_replica_selection": "true",
"rebalance": {
"enable": "all"
},
"allocation": {
"node_concurrent_incoming_recoveries": "2",
"include": {
"_tier": ""
},
"node_initial_primaries_recoveries": "4",
"same_shard": {
"host": "false"
},
"total_shards_per_node": "-1",
"require": {
"_tier": ""
},
"shard_state": {
"reroute": {
"priority": "NORMAL"
}
},
"type": "balanced",
"disk": {
"threshold_enabled": "true",
"watermark": {
"flood_stage.frozen.max_headroom": "20GB",
"flood_stage": "95%",
"high": "90%",
"low": "85%",
"enable_for_single_data_node": "false",
"flood_stage.frozen": "95%"
},
"include_relocations": "true",
"reroute_interval": "60s"
},
"awareness": {
"attributes": []
},
"balance": {
"index": "0.55",
"threshold": "1.0",
"shard": "0.45"
},
"enable": "all",
"node_concurrent_outgoing_recoveries": "2",
"allow_rebalance": "indices_all_active",
"cluster_concurrent_rebalance": "2",
"node_concurrent_recoveries": "2",
"exclude": {
"_tier": ""
}
}
}