Hello !
I have the following cluster :
- node1 with attributes box_type: hot and zone: zone1
- node2 with attributes box_type: hot and zone: zone1
- node3 with attributes box_type: hot and zone: zone2
- node4 with attributes box_type: cold and zone: zone1
- node5 with attributes box_type: cold and zone: zone2
Masters and Data : node1, node2, node 3 (all hot nodes)
Data only : node 4, node 5 (all cold nodes)
//
On the three masters, I set this configuration :
-
cluster.routing.allocation.awareness.attributes: zone
-
cluster.routing.allocation.awareness.force.zone.values: zone1,zone2
//
- Index template :
{ "filebeat-template-test" : { "order" : 0, "index_patterns" : [ "filebeat-test-*" ], "settings" : { "index" : { "lifecycle" : { "name" : "mainpolicy", "rollover_alias" : "filebeat-test" }, "routing" : { "allocation" : { "include" : { "box_type" : "hot" } } }, "mapping" : { "total_fields" : { "limit" : "10000" } }, [...]
ILM policy :
{
"mainpolicy" : {
"version" : 3,
"modified_date" : "2020-11-06T08:00:47.215Z",
"policy" : {
"phases" : {
"hot" : {
"min_age" : "0ms",
"actions" : {
"rollover" : {
"max_size" : "50gb",
"max_age" : "1d"
},
"set_priority" : {
"priority" : null
}
}
},
"delete" : {
"min_age" : "10d",
"actions" : {
"delete" : {
"delete_searchable_snapshot" : true
}
}
},
"cold" : {
"min_age" : "7d",
"actions" : {
"allocate" : {
"include" : { },
"exclude" : { },
"require" : {
"box_type" : "cold"
}
},
"freeze" : { },
"set_priority" : {
"priority" : null
Index settings :
{
"settings": {
"index": {
"lifecycle": {
"name": "mainpolicy",
"rollover_alias": "filebeat-test",
"indexing_complete": "true"
},
"routing": {
"allocation": {
"include": {
"box_type": "hot"
},
"require": {
"box_type": "cold"
}
}
},
"mapping": {
"total_fields": {
"limit": "10000"
}
},
"refresh_interval": "5s",
"number_of_shards": "3",
"provided_name": "filebeat-test-000006",
"max_docvalue_fields_search": "200",
[...]
After a restart of all nodes, I have the following error :
{
"index" : "filebeat-test-000006",
"shard" : 1,
"primary" : false,
"current_state" : "unassigned",
"unassigned_info" : {
"reason" : "CLUSTER_RECOVERED",
"at" : "2020-11-19T08:06:18.506Z",
"last_allocation_status" : "no_attempt"
},
"can_allocate" : "no",
"allocate_explanation" : "cannot allocate because allocation is not permitted to any of the nodes",
"node_allocation_decisions" : [
{
"node_id" : "2gjmhy4gT1-WKvvTqtn3Vg",
"node_name" : "node4",
"transport_address" : "192.168.1.11:9300",
"node_attributes" : {
"xpack.installed" : "true",
"box_type" : "cold",
"zone" : "zone1",
"transform.node" : "true"
},
"node_decision" : "no",
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.include] filters [box_type:"hot"]"""
}
]
},
{
"node_id" : "3qvjjvnvTUajtoP0X4h97g",
"node_name" : "node1",
"transport_address" : "192.168.1.27:9300",
"node_attributes" : {
"xpack.installed" : "true",
"box_type" : "hot",
"zone" : "zone1",
"transform.node" : "true"
},
"node_decision" : "no",
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"cold"]"""
}
]
},
{
"node_id" : "7Hp7eqbSQIqZSiH-s6rVdQ",
"node_name" : "node3",
"transport_address" : "192.168.1.29:9300",
"node_attributes" : {
"xpack.installed" : "true",
"box_type" : "hot",
"zone" : "zone2",
"transform.node" : "true"
},
"node_decision" : "no",
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"cold"]"""
},
{
"decider" : "same_shard",
"decision" : "NO",
"explanation" : "a copy of this shard is already allocated to this node [[filebeat-test-000006][1], node[7Hp7eqbSQIqZSiH-s6rVdQ], [P], s[STARTED], a[id=IjIUykz0Sm27cT-Wa4ZpfA]]"
},
{
"decider" : "awareness",
"decision" : "NO",
"explanation" : "there are too many copies of the shard allocated to nodes with attribute [zone], there are [2] total configured shard copies for this shard id and [3] total attribute values, expected the allocated shard count per attribute [2] to be less than or equal to the upper bound of the required number of shards per attribute [1]"
}
]
},
{
"node_id" : "J0FDiT6mRLG7kyujZ1RC_g",
"node_name" : "node2",
"transport_address" : "192.168.1.28:9300",
"node_attributes" : {
"xpack.installed" : "true",
"box_type" : "hot",
"zone" : "zone1",
"transform.node" : "true"
},
"node_decision" : "no",
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"cold"]"""
}
]
},
{
"node_id" : "dgEgyoNmTSalZmqDx3mUoQ",
"node_name" : "node5",
"transport_address" : "192.168.1.10:9300",
"node_attributes" : {
"xpack.installed" : "true",
"box_type" : "cold",
"zone" : "zone2",
"transform.node" : "true"
},
"node_decision" : "no",
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : """node does not match index setting [index.routing.allocation.include] filters [box_type:"hot"]"""
},
{
"decider" : "awareness",
"decision" : "NO",
"explanation" : "there are too many copies of the shard allocated to nodes with attribute [zone], there are [2] total configured shard copies for this shard id and [3] total attribute values, expected the allocated shard count per attribute [2] to be less than or equal to the upper bound of the required number of shards per attribute [1]"
}
]
}
]
}
It looks like only replicas shards which have to be in cold phase are concerned.
Example of a concerned index :
- Shards allocation for a index without issue (in hot phase) :
zone1 : replica 1 // replica 2 // primary 0
zone2 : replica 0 // primary 1 // primary 2
- Shards allocation for a index with issue (have to be in cold phase) :
zone1: primary 0
zone2: primary 1 // primary 2
Thanks for your help !