Hot Warm Cold - not going to cold?

learningelastic · August 7, 2024, 9:38pm

I'm learning about ILM and hot warm cold structure. For some reason, my indices aren't going to the cold node OR being deleted. I will show my experiment below and someone tell me what I did wrong?

I have 3 nodes and they each have these lines:

# https://node1.example.com : /etc/elasticsearch.yml
node.roles: ["master", "data_content", "data_hot", "ingest", "ml", "remote_cluster_client", "transform"]

# https://node2.example.com : /etc/elasticsearch.yml
node.roles: ["master", "data_content", "data_warm", "ingest", "ml", "remote_cluster_client", "transform"]

# https://node3.example.com : /etc/elasticsearch.yml
node.roles: ["master", "data_content", "data_cold", "ingest", "ml", "remote_cluster_client", "transform"]

I then did this:

# set up policy
PUT _ilm/policy/hot-warm-cold-policy
{
  "policy": {
    "phases": {
      "hot": {
        "min_age": "0ms",
        "actions": {
          "rollover": {
            "max_age": "5m"
          }
        }
      },
      "warm": {
        "min_age": "5m",
        "actions": {
          "allocate": {
            "require": {
              "data": "warm"
            }
          },
          "forcemerge": {
            "max_num_segments": 1
          },
          "set_priority": {
            "priority": 50
          }
        }
      },
      "cold": {
        "min_age": "10m",
        "actions": {
          "allocate": {
            "require": {
              "data": "cold"
            }
          },
          "set_priority": {
            "priority": 0
          }
        }
      },
      "delete": {
        "min_age": "15m",
        "actions": {
          "delete": {}
        }
      }
    }
  }
}

# make index template
PUT _index_template/my_template
{
  "index_patterns": ["my-index-*"],
  "template": {
    "settings": {
      "index.lifecycle.name": "hot-warm-cold-policy",
      "index.lifecycle.rollover_alias": "my-index-alias"
    }
  }
}


# make index
PUT /my-index-000001
{
  "aliases": {
    "my-index-alias": {
      "is_write_index": true
    }
  }
}

Then I inserted some docs at random times throughout the hour like this:

POST /my-index-alias/_doc
{
  "user": "john_doe",
  "message": "I am hungry",
  "created_at": "2024-08-04T10:00:00Z"
}

I waited maybe 90 minutes or so, maybe longer, i can't remember, then I ran this command:

GET my-index-alias/_ilm/explain

Which gave me this:

{
  "indices": {
    "my-index-000004": {
      "index": "my-index-000004",
      "managed": true,
      "policy": "hot-warm-cold-policy",
      "index_creation_date_millis": 1723062677730,
      "time_since_index_creation": "1.04h",
      "lifecycle_date_millis": 1723062677730,
      "age": "1.04h",
      "phase": "hot",
      "phase_time_millis": 1723062677892,
      "action": "rollover",
      "action_time_millis": 1723062678092,
      "step": "check-rollover-ready",
      "step_time_millis": 1723062678092,
      "phase_execution": {
        "policy": "hot-warm-cold-policy",
        "phase_definition": {
          "min_age": "0ms",
          "actions": {
            "rollover": {
              "max_age": "5m",
              "min_docs": 1,
              "max_primary_shard_docs": 200000000
            }
          }
        },
        "version": 1,
        "modified_date_in_millis": 1723060413343
      }
    },
    "my-index-000003": {
      "index": "my-index-000003",
      "managed": true,
      "policy": "hot-warm-cold-policy",
      "index_creation_date_millis": 1723061477730,
      "time_since_index_creation": "1.37h",
      "lifecycle_date_millis": 1723062677692,
      "age": "1.04h",
      "phase": "warm",
      "phase_time_millis": 1723063277561,
      "action": "allocate",
      "action_time_millis": 1723063277761,
      "step": "check-allocation",
      "step_time_millis": 1723063277961,
      "step_info": {
        "message": "Waiting for [2] shards to be allocated to nodes matching the given filters",
        "shards_left_to_allocate": 2,
        "all_shards_active": true,
        "number_of_replicas": 1
      },
      "phase_execution": {
        "policy": "hot-warm-cold-policy",
        "phase_definition": {
          "min_age": "5m",
          "actions": {
            "allocate": {
              "include": {},
              "exclude": {},
              "require": {
                "data": "warm"
              }
            },
            "forcemerge": {
              "max_num_segments": 1
            },
            "set_priority": {
              "priority": 50
            }
          }
        },
        "version": 1,
        "modified_date_in_millis": 1723060413343
      }
    },
    "my-index-000002": {
      "index": "my-index-000002",
      "managed": true,
      "policy": "hot-warm-cold-policy",
      "index_creation_date_millis": 1723060877731,
      "time_since_index_creation": "1.54h",
      "lifecycle_date_millis": 1723061477553,
      "age": "1.37h",
      "phase": "warm",
      "phase_time_millis": 1723062077623,
      "action": "allocate",
      "action_time_millis": 1723062077823,
      "step": "check-allocation",
      "step_time_millis": 1723062077823,
      "step_info": {
        "message": "Waiting for [2] shards to be allocated to nodes matching the given filters",
        "shards_left_to_allocate": 2,
        "all_shards_active": true,
        "number_of_replicas": 1
      },
      "phase_execution": {
        "policy": "hot-warm-cold-policy",
        "phase_definition": {
          "min_age": "5m",
          "actions": {
            "allocate": {
              "include": {},
              "exclude": {},
              "require": {
                "data": "warm"
              }
            },
            "forcemerge": {
              "max_num_segments": 1
            },
            "set_priority": {
              "priority": 50
            }
          }
        },
        "version": 1,
        "modified_date_in_millis": 1723060413343
      }
    },
    "my-index-000001": {
      "index": "my-index-000001",
      "managed": true,
      "policy": "hot-warm-cold-policy",
      "index_creation_date_millis": 1723060413733,
      "time_since_index_creation": "1.67h",
      "lifecycle_date_millis": 1723060877684,
      "age": "1.54h",
      "phase": "warm",
      "phase_time_millis": 1723061367504,
      "action": "allocate",
      "action_time_millis": 1723061367704,
      "step": "check-allocation",
      "step_time_millis": 1723061367904,
      "step_info": {
        "message": "Waiting for [2] shards to be allocated to nodes matching the given filters",
        "shards_left_to_allocate": 2,
        "all_shards_active": true,
        "number_of_replicas": 1
      },
      "phase_execution": {
        "policy": "hot-warm-cold-policy",
        "phase_definition": {
          "min_age": "5m",
          "actions": {
            "allocate": {
              "include": {},
              "exclude": {},
              "require": {
                "data": "warm"
              }
            },
            "forcemerge": {
              "max_num_segments": 1
            },
            "set_priority": {
              "priority": 50
            }
          }
        },
        "version": 1,
        "modified_date_in_millis": 1723060413343
      }
    }
  }
}

As you can see my my-index-000001 is just sitting in warm. Did I do something wrong?

leandrojmp · August 8, 2024, 1:56am

Can you run a GET _cat/shards?v on Kibana Dev tools and share the result for your text indices?

I'm assuming that it will be on the first node, the one with the data_hot node.

The issue here is that allocate uses custom attributes not node roles.

Using something like this:

          "allocate": {
            "require": {
              "data": "warm"
            }

Would require you to have nodes with a custom attribute named data with the value warm, something like this in your elasticsearch.yml

node.attr.data: warm

This is not the same thing as using the built-in roles data_hot, data_warm and data_cold.

To move data between different nodes using the built-in roles you need to use the migrate action if I'm not wrong, the allocate requires you to use custom attributes for your nodes.

I normally build my policies using the Kibana UI, you could test it in Kibana and see what will be the json payload it will generate.

learningelastic · August 8, 2024, 3:34am

@ leandrojmp

Thanks for your message! I've already torn down my cluster but your answer shed light on a lot of things!

Because I noticed the primary shard of my-index-0000001 was immediately on node2 the moment it was created, and the replica shard was on node3. This made me ask, "Wait....node2 and node3 are supposed to be data_warm and data_cold respectively...what's going on?"

So then i tore down my cluster. Then in my index-template, I added

      "index.routing.allocation.require.temp": "hot",
      "index.number_of_replicas": 0,
      "index.number_of_shards": 1,

But the index couldn't be made...I got some error like this:

"type":"unavailable_shards_exception","reason":"[my-index-000001][0] primary shard is not active Timeout:

So this got me thinking maybe node1 is underpowered? And maybe I need more than 3 nodes in my elasticsearch cluster?

So my next experiment was to try this with a 5 node cluster.

But now you just raised an interesting point I wasn't aware of...the difference between allocate vs. migrate!

Still...I am very confused on when someone is supposed to use allocate with attributes vs. migrate with node.roles. I'm having trouble understanding when to use what?

leandrojmp · August 8, 2024, 3:46am

Did you add a setting named node.attr.temp with the value hot on any of your nodes?

Because adding index.routing.allocation.require.temp in your template will force elasticsearch to only allocate shards on nodes that have the custom attribute temp and in this case with the value hot.

The main difference between allocate and migrate is that migrate will use the built-in data tier roles, data_hot, data_warm and data_cold, and allocate uses custom node attributes.

These custom node attributes needs to be configured in each node in elasticsearch.yml, they are used when you need a more advanced shard allocation awereness, like for example tell elasticsearch to allocate shards on different racks and things like that.

Using the built-in data roles is enough in most of the cases.

I also recommend that you create the policies first in Kibana UI and then click in the Show Request link to see how the policy would be created using the API.

learningelastic · August 8, 2024, 5:20pm

That is so fascinating! And a quick update, I am now using this policy:

# PUT _ilm/policy/a-hot-warm-cold-policy
{
  "policy": {
    "phases": {
      "hot": {
        "min_age": "0ms",
        "actions": {
          "rollover": {
            "max_age": "5m"
          }
        }
      },
      "warm": {
        "min_age": "5m",
        "actions": {
          "migrate": {},
          "allocate": {
            "number_of_replicas": 0
          },
          "forcemerge": {
            "max_num_segments": 1
          },
          "set_priority": {
            "priority": 50
          }
        }
      },
      "cold": {
        "min_age": "10m",
        "actions": {
          "migrate": {},
          "allocate": {
            "number_of_replicas": 0
          },
          "set_priority": {
            "priority": 0
          }
        }
      },
      "delete": {
        "min_age": "15m",
        "actions": {
          "delete": {}
        }
      }
    }
  }
}
# PUT _index_template/a_my_template
{
  "index_patterns": ["a-my-index-*"],
  "template": {
    "settings": {
      "index.routing.allocation.require.data": null,
      "index.routing.allocation.include._tier_preference": "data_hot",
      "index.number_of_replicas": 0,
      "index.number_of_shards": 1,
      "index.lifecycle.name": "a-hot-warm-cold-policy",
      "index.lifecycle.rollover_alias": "a-my-index-alias"
    }
  }
}

And now it seems like indices are migrating between the tiers. I'm doing some more tests to be sure.

Do you know if the "migrate":{} declaration in the policy needs to be explicitly typed into the policy?

And after a bit of reading, correct me if i'm wrong, but it seems like migrate action is a feature released in Elastic 7.10. Prior to Elastic 7.10, peopel used allocate. Introducing migrate in 7.10 is basically to formalize the process for hot-warm-cold architecture?

Topic		Replies	Views
ILM Hot, Warm, Cold? Elasticsearch ilm-index-lifecycle-management	9	1477	January 1, 2021
ILM Hot, Warm, Cold not moving indexes Elasticsearch ilm-index-lifecycle-management	4	375	February 8, 2024
Configuring ILM hot cold delete policy on ES Cluster Elasticsearch ilm-index-lifecycle-management	6	860	February 13, 2021
Ilm rollover stuck in warm phase Elasticsearch ilm-index-lifecycle-management	2	1566	May 18, 2020
Unable to update ILM policy Elasticsearch ilm-index-lifecycle-management	5	1535	November 7, 2019

Hot Warm Cold - not going to cold?

Related topics