Index Failover to Warm node not happening

Hi Team,

My index is not rolling over to Warm nodes as per the ILM policy , it just simply grows. Please see the attached . ILM policy is set to Index size 50GB or 30 Days. Index stats and screenshots attached , i could not find any issues

Please help

{
"_shards": {
"total": 2,
"successful": 2,
"failed": 0
},
"stats": {
"uuid": "g6NKgG6VT8ucP80NChpCxQ",
"primaries": {
"docs": {
"count": 58067343,
"deleted": 0
},
"store": {
"size_in_bytes": 46527647876,
"reserved_in_bytes": 0
},
"indexing": {
"index_total": 2153862,
"index_time_in_millis": 930895,
"index_current": 0,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 13,
"query_time_in_millis": 868,
"query_current": 0,
"fetch_total": 13,
"fetch_time_in_millis": 0,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 2,
"current_docs": 7361664,
"current_size_in_bytes": 5884551813,
"total": 50,
"total_time_in_millis": 662880,
"total_docs": 4826037,
"total_size_in_bytes": 4741719006,
"total_stopped_time_in_millis": 18663,
"total_throttled_time_in_millis": 360156,
"total_auto_throttle_in_bytes": 10584561
},
"refresh": {
"total": 111,
"total_time_in_millis": 57264,
"external_total": 98,
"external_total_time_in_millis": 56530,
"listeners": 0
},
"flush": {
"total": 9,
"periodic": 9,
"total_time_in_millis": 10576
},
"warmer": {
"current": 0,
"total": 97,
"total_time_in_millis": 12
},
"query_cache": {
"memory_size_in_bytes": 0,
"total_count": 1312,
"hit_count": 0,
"miss_count": 1312,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 0,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 56,
"memory_in_bytes": 2781120,
"terms_memory_in_bytes": 1718304,
"stored_fields_memory_in_bytes": 528336,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 0,
"points_memory_in_bytes": 0,
"doc_values_memory_in_bytes": 534480,
"index_writer_memory_in_bytes": 23996252,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": 1599753387064,
"file_sizes": {}
},
"translog": {
"operations": 251207,
"size_in_bytes": 534692472,
"uncommitted_operations": 251207,
"uncommitted_size_in_bytes": 534692472,
"earliest_last_modified_age": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
},
"total": {
"docs": {
"count": 116133741,
"deleted": 0
},
"store": {
"size_in_bytes": 92558575289,
"reserved_in_bytes": 0
},
"indexing": {
"index_total": 4307717,
"index_time_in_millis": 1839373,
"index_current": 1,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 20,
"query_time_in_millis": 2074,
"query_current": 0,
"fetch_total": 20,
"fetch_time_in_millis": 0,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 4,
"current_docs": 13496833,
"current_size_in_bytes": 11315727435,
"total": 98,
"total_time_in_millis": 1332893,
"total_docs": 9255286,
"total_size_in_bytes": 8925772873,
"total_stopped_time_in_millis": 54120,
"total_throttled_time_in_millis": 739319,
"total_auto_throttle_in_bytes": 23286034
},
"refresh": {
"total": 220,
"total_time_in_millis": 115113,
"external_total": 196,
"external_total_time_in_millis": 114757,
"listeners": 0
},
"flush": {
"total": 19,
"periodic": 18,
"total_time_in_millis": 15781
},
"warmer": {
"current": 0,
"total": 194,
"total_time_in_millis": 26
},
"query_cache": {
"memory_size_in_bytes": 0,
"total_count": 1959,
"hit_count": 0,
"miss_count": 1959,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 0,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 109,
"memory_in_bytes": 5412300,
"terms_memory_in_bytes": 3374816,
"stored_fields_memory_in_bytes": 1054936,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 0,
"points_memory_in_bytes": 0,
"doc_values_memory_in_bytes": 982548,
"index_writer_memory_in_bytes": 49080332,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": 1599753387064,
"file_sizes": {}
},
"translog": {
"operations": 434737,
"size_in_bytes": 925679915,
"uncommitted_operations": 434737,
"uncommitted_size_in_bytes": 925679915,
"earliest_last_modified_age": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
}
}
}

The specified size refers to primary shard size. Does the size you are looking at by any chance include the size of replica shards as well?

1 Like

Hi Christian,

My understanding was it will failover when total size exceeds (including primary and replica shard) the set size.

failover happed to warm node when it crossed 100GB.

Thank you Christian

Given that you can change the number of replica shards at any time the behavior could be very unpredictable if full size was considered.

Thanks again for your help and support.

we are currently going with replica: 1 for all indices as of now and planning to archive to s3.