The index is large (~1.4tb), and it seems to be a performance/scalability issue. However, what I did not understand was there were some successful snapshots on the same ES nodes, and S3 should be reliable to store this amount of data.
The snapshot status is as below:
{
"snapshots" : [
{
"snapshot" : "exabeam-2021.08.10",
"repository" : "exabeam_snapshot_repo",
"uuid" : "fRjaWFFfQDK7bHbJRR0yHw",
"state" : "SUCCESS",
"include_global_state" : false,
"shards_stats" : {
"initializing" : 0,
"started" : 0,
"finalizing" : 0,
"done" : 69,
"failed" : 21,
"total" : 90
},
"stats" : {
"incremental" : {
"file_count" : 1339,
"size_in_bytes" : 1407627500247
},
"total" : {
"file_count" : 1339,
"size_in_bytes" : 1407627500247
},
"start_time_in_millis" : 1634974652508,
"time_in_millis" : 11770904,
"number_of_files" : 1339,
"processed_files" : 1339,
"total_size_in_bytes" : 1407627500247,
"processed_size_in_bytes" : 1407627500247
},
"indices" : {
"exabeam-2021.08.10" : {
"shards_stats" : {
"initializing" : 0,
"started" : 0,
"finalizing" : 0,
"done" : 69,
"failed" : 21,
"total" : 90
},
"stats" : {
"incremental" : {
"file_count" : 1339,
"size_in_bytes" : 1407627500247
},
"total" : {
"file_count" : 1339,
"size_in_bytes" : 1407627500247
},
"start_time_in_millis" : 1634974652508,
"time_in_millis" : 11770904,
"number_of_files" : 1339,
"processed_files" : 1339,
"total_size_in_bytes" : 1407627500247,
"processed_size_in_bytes" : 1407627500247
},
"shards" : {
"0" : {
"stage" : "DONE",
"stats" : {
"incremental" : {
"file_count" : 15,
"size_in_bytes" : 25894495780
},
"total" : {
"file_count" : 15,
"size_in_bytes" : 25894495780
},
"start_time_in_millis" : 1634974652523,
"time_in_millis" : 13683609,
"number_of_files" : 15,
"processed_files" : 15,
"total_size_in_bytes" : 25894495780,
"processed_size_in_bytes" : 25894495780
}
},
"1" : {
"stage" : "DONE",
"stats" : {
"incremental" : {
"file_count" : 18,
"size_in_bytes" : 25894217032
},
"total" : {
"file_count" : 18,
"size_in_bytes" : 25894217032
},
"start_time_in_millis" : 1634974652530,
"time_in_millis" : 13699525,
"number_of_files" : 18,
"processed_files" : 18,
"total_size_in_bytes" : 25894217032,
"processed_size_in_bytes" : 25894217032
}
},
"2" : {
"stage" : "FAILURE",
"stats" : {
"incremental" : {
"file_count" : 0,
"size_in_bytes" : 0
},
"total" : {
"file_count" : 0,
"size_in_bytes" : 0
},
"start_time_in_millis" : 0,
"time_in_millis" : 0,
"number_of_files" : 0,
"processed_files" : 0,
"total_size_in_bytes" : 0,
"processed_size_in_bytes" : 0
},
"reason" : "IndexShardSnapshotFailedException[com.amazonaws.SdkClientException: Unable to execute HTTP request: s3archivingtest.s3.us-west-2.amazonaws.com]; nested: SdkClientException[Unable to execute HTTP request: s3archivingtest.s3.us-west-2.amazonaws.com]; nested: UnknownHostException[s3archivingtest.s3.us-west-2.amazonaws.com]; "
},
"3" : {
"stage" : "DONE",
"stats" : {
"incremental" : {
"file_count" : 18,
"size_in_bytes" : 25908855448
},
"total" : {
"file_count" : 18,
"size_in_bytes" : 25908855448
},
"start_time_in_millis" : 1634974652512,
"time_in_millis" : 13696316,
"number_of_files" : 18,
"processed_files" : 18,
"total_size_in_bytes" : 25908855448,
"processed_size_in_bytes" : 25908855448
}
},
...