Index Failover to Warm node not happening

ajesh · September 10, 2020, 4:04pm

Hi Team,

My index is not rolling over to Warm nodes as per the ILM policy , it just simply grows. Please see the attached . ILM policy is set to Index size 50GB or 30 Days. Index stats and screenshots attached , i could not find any issues

Please help

{
"_shards": {
"total": 2,
"successful": 2,
"failed": 0
},
"stats": {
"uuid": "g6NKgG6VT8ucP80NChpCxQ",
"primaries": {
"docs": {
"count": 58067343,
"deleted": 0
},
"store": {
"size_in_bytes": 46527647876,
"reserved_in_bytes": 0
},
"indexing": {
"index_total": 2153862,
"index_time_in_millis": 930895,
"index_current": 0,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 13,
"query_time_in_millis": 868,
"query_current": 0,
"fetch_total": 13,
"fetch_time_in_millis": 0,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 2,
"current_docs": 7361664,
"current_size_in_bytes": 5884551813,
"total": 50,
"total_time_in_millis": 662880,
"total_docs": 4826037,
"total_size_in_bytes": 4741719006,
"total_stopped_time_in_millis": 18663,
"total_throttled_time_in_millis": 360156,
"total_auto_throttle_in_bytes": 10584561
},
"refresh": {
"total": 111,
"total_time_in_millis": 57264,
"external_total": 98,
"external_total_time_in_millis": 56530,
"listeners": 0
},
"flush": {
"total": 9,
"periodic": 9,
"total_time_in_millis": 10576
},
"warmer": {
"current": 0,
"total": 97,
"total_time_in_millis": 12
},
"query_cache": {
"memory_size_in_bytes": 0,
"total_count": 1312,
"hit_count": 0,
"miss_count": 1312,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 0,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 56,
"memory_in_bytes": 2781120,
"terms_memory_in_bytes": 1718304,
"stored_fields_memory_in_bytes": 528336,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 0,
"points_memory_in_bytes": 0,
"doc_values_memory_in_bytes": 534480,
"index_writer_memory_in_bytes": 23996252,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": 1599753387064,
"file_sizes": {}
},
"translog": {
"operations": 251207,
"size_in_bytes": 534692472,
"uncommitted_operations": 251207,
"uncommitted_size_in_bytes": 534692472,
"earliest_last_modified_age": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
},
"total": {
"docs": {
"count": 116133741,
"deleted": 0
},
"store": {
"size_in_bytes": 92558575289,
"reserved_in_bytes": 0
},
"indexing": {
"index_total": 4307717,
"index_time_in_millis": 1839373,
"index_current": 1,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 20,
"query_time_in_millis": 2074,
"query_current": 0,
"fetch_total": 20,
"fetch_time_in_millis": 0,
"fetch_current": 0,
"scroll_total": 0,
"scroll_time_in_millis": 0,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 4,
"current_docs": 13496833,
"current_size_in_bytes": 11315727435,
"total": 98,
"total_time_in_millis": 1332893,
"total_docs": 9255286,
"total_size_in_bytes": 8925772873,
"total_stopped_time_in_millis": 54120,
"total_throttled_time_in_millis": 739319,
"total_auto_throttle_in_bytes": 23286034
},
"refresh": {
"total": 220,
"total_time_in_millis": 115113,
"external_total": 196,
"external_total_time_in_millis": 114757,
"listeners": 0
},
"flush": {
"total": 19,
"periodic": 18,
"total_time_in_millis": 15781
},
"warmer": {
"current": 0,
"total": 194,
"total_time_in_millis": 26
},
"query_cache": {
"memory_size_in_bytes": 0,
"total_count": 1959,
"hit_count": 0,
"miss_count": 1959,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 0,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 109,
"memory_in_bytes": 5412300,
"terms_memory_in_bytes": 3374816,
"stored_fields_memory_in_bytes": 1054936,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 0,
"points_memory_in_bytes": 0,
"doc_values_memory_in_bytes": 982548,
"index_writer_memory_in_bytes": 49080332,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": 1599753387064,
"file_sizes": {}
},
"translog": {
"operations": 434737,
"size_in_bytes": 925679915,
"uncommitted_operations": 434737,
"uncommitted_size_in_bytes": 925679915,
"earliest_last_modified_age": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
}
}
}

Christian_Dahlqvist · September 12, 2020, 9:02am

The specified size refers to primary shard size. Does the size you are looking at by any chance include the size of replica shards as well?

ajesh · September 12, 2020, 4:56pm

Hi Christian,

My understanding was it will failover when total size exceeds (including primary and replica shard) the set size.

failover happed to warm node when it crossed 100GB.

Thank you Christian

Christian_Dahlqvist · September 12, 2020, 5:07pm

Given that you can change the number of replica shards at any time the behavior could be very unpredictable if full size was considered.

ajesh · September 13, 2020, 3:45pm

Thanks again for your help and support.

we are currently going with replica: 1 for all indices as of now and planning to archive to s3.

system · October 11, 2020, 3:45pm

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.