Hi all,
I have a problem after configuring my cluster with hot / cold nodes. My goal is to separate hot indices with their replica into nodes tagged "hot" and the other one into nodes tagged "cold".
My replicas can't be allocated by the cluster :
Errors
Shards :
index                       shard prirep state      docs store node
journalbeat_elk_test-000001 0     p      STARTED       0  283b elastic-data-node
journalbeat_elk_test-000001 0     r      UNASSIGNED            
GET _cluster/allocation/explain
{
  "index" : "journalbeat_elk_test-000001",
  "shard" : 0,
  "primary" : false,
  "current_state" : "unassigned",
  "unassigned_info" : {
    "reason" : "INDEX_CREATED",
    "at" : "2020-05-14T06:42:05.082Z",
    "last_allocation_status" : "no_attempt"
  },
  "can_allocate" : "no",
  "allocate_explanation" : "cannot allocate because allocation is not permitted to any of the nodes",
  "node_allocation_decisions" : [
    {
      "node_name" : "elastic-data-node3",
      "node_attributes" : {
        "xpack.installed" : "true",
        "box_type" : "cold"
      },
      "node_decision" : "no",
      "weight_ranking" : 1,
      "deciders" : [
        {
          "decider" : "filter",
          "decision" : "NO",
          "explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"hot"]"""
        }
      ]
    },
    {
      "node_name" : "elastic-data-node2",
      "node_attributes" : {
        "xpack.installed" : "true",
        "box_type" : "hot"
      },
      "node_decision" : "no",
      "weight_ranking" : 2,
      "deciders" : [
        {
          "decider" : "awareness",
          "decision" : "NO",
          "explanation" : "there are too many copies of the shard allocated to nodes with attribute [box_type], there are [2] total configured shard copies for this shard id and [3] total attribute values, expected the allocated shard count per attribute [2] to be less than or equal to the upper bound of the required number of shards per attribute [1]"
        }
      ]
    },
    {
      "node_name" : "elastic-data-node4",
      "node_attributes" : {
        "xpack.installed" : "true",
        "box_type" : "cold"
      },
      "node_decision" : "no",
      "weight_ranking" : 3,
      "deciders" : [
        {
          "decider" : "filter",
          "decision" : "NO",
          "explanation" : """node does not match index setting [index.routing.allocation.require] filters [box_type:"hot"]"""
        }
      ]
    },
    {
      "node_name" : "elastic-data-node",
      "node_attributes" : {
        "xpack.installed" : "true",
        "box_type" : "hot"
      },
      "node_decision" : "no",
      "weight_ranking" : 4,
      "deciders" : [
        {
          "decider" : "same_shard",
          "decision" : "NO",
          "explanation" : "the shard cannot be allocated to the same node on which a copy of the shard already exists [[journalbeat_elk_test-000001][0], node[elastic-data-node], [P], s[STARTED], a[id=XHp2qZm2Q7O4E7g6gDoYoA]]"
        },
        {
          "decider" : "awareness",
          "decision" : "NO",
          "explanation" : "there are too many copies of the shard allocated to nodes with attribute [box_type], there are [2] total configured shard copies for this shard id and [3] total attribute values, expected the allocated shard count per attribute [2] to be less than or equal to the upper bound of the required number of shards per attribute [1]"
        }
      ]
    }
  ]
}
Test execution :
Reindexing existing index to a new one with ILM policy.
Infra :
- 3 Master nodes
- 4 Data nodes (2 hot / 2 cold)
Cluster settings :
{
  "persistent" : {
    "indices" : {
      "recovery" : {
        "max_bytes_per_sec" : "250mb"
      }
    },
    "xpack" : {
      "monitoring" : {
        "collection" : {
          "enabled" : "true"
        }
      }
    }
  },
  "transient" : {
    "cluster" : {
      "routing" : {
        "allocation" : {
          "awareness" : {
            "attributes" : "box_type",
            "force" : {
              "box_type" : {
                "values" : "hot,cold"
              }
            }
          },
          "enable" : "all"
        }
      }
    },
    "indices" : {
      "lifecycle" : {
        "poll_interval" : "1s"
      }
    }
  }
}
ILM policy used :
{
  "test_ilm_hot_cold" : {
    "version" : 19,
    "modified_date" : "2020-05-14T07:04:23.061Z",
    "policy" : {
      "phases" : {
        "cold" : {
          "min_age" : "1m",
          "actions" : {
            "freeze" : { },
            "allocate" : {
              "include" : {
                "box_type" : "cold"
              },
              "exclude" : { },
              "require" : { }
            },
            "set_priority" : {
              "priority" : 0
            }
          }
        },
        "hot" : {
          "min_age" : "0ms",
          "actions" : {
            "rollover" : {
              "max_size" : "100mb"
            },
            "set_priority" : {
              "priority" : 100
            }
          }
        },
        "delete" : {
          "min_age" : "390d",
          "actions" : {
            "delete" : { }
          }
        }
      }
    }
  }
Node information :
node                 attr              value
elastic-data-node2   xpack.installed   true
elastic-data-node2   box_type          hot
elastic-data-node3   xpack.installed   true
elastic-data-node3   box_type          cold
elastic-data-node4   xpack.installed   true
elastic-data-node4   box_type          cold
elastic-master-node2 xpack.installed   true
elastic-master-node  xpack.installed   true
elastic-master-node3 xpack.installed   true
elastic-data-node    xpack.installed   true
elastic-data-node    box_type          hot
Template used :
{
  "journalbeat_elk_test_ilm" : {
    "order" : 0,
    "index_patterns" : [
      "journalbeat_elk_test-*"
    ],
    "settings" : {
      "index" : {
        "lifecycle" : {
          "name" : "test_ilm_hot_cold",
          "rollover_alias" : "test_journalbeat_elk"
        },
        "routing" : {
          "allocation" : {
            "require" : {
              "box_type" : "hot"
            }
          }
        },
        "number_of_shards" : "1",
        "number_of_replicas" : "1"
      }
    },
    "mappings" : { },
    "aliases" : { }
  }
} 
Alias used :
{
  "journalbeat_elk_test-000001" : {
    "aliases" : {
      "test_journalbeat_elk" : {
        "is_write_index" : true
      }
    }
  }
}
My question is : Why replica shard cannot be allocated on the same attribute [box_type] than primary shards ?
Best regards,
Thomas R.


