Hi Guys,
I am using SLM to create snapshots but its failing to snapshot 1 of the shards consistently. The shard is 120GB in size but I have been able to snapshot such large shards for other indices successfully. I can't get any useful information from the failure details which is as below.
"details" : """{"type":"snapshot_exception","reason":"[snapshot_1:snap-2021.02.05-jkhokpgqs3o-fmzyuowvyw] failed to create snapshot successfully, 1 out of 68 total shards failed","stack_trace":"SnapshotException[[snapshot_1:snap-2021.02.05-jkhokpgqs3o-fmzyuowvyw] failed to create snapshot successfully, 1 out of 68 total shards failed]
at org.elasticsearch.xpack.slm.SnapshotLifecycleTask$1.onResponse(SnapshotLifecycleTask.java:110)
at org.elasticsearch.xpack.slm.SnapshotLifecycleTask$1.onResponse(SnapshotLifecycleTask.java:92)
at org.elasticsearch.action.support.ContextPreservingActionListener.onResponse(ContextPreservingActionListener.java:43)
at org.elasticsearch.action.support.TransportAction$1.onResponse(TransportAction.java:89)
at org.elasticsearch.action.support.TransportAction$1.onResponse(TransportAction.java:83)
at org.elasticsearch.action.support.ContextPreservingActionListener.onResponse(ContextPreservingActionListener.java:43)
at org.elasticsearch.action.ActionListener$2.onResponse(ActionListener.java:89)
at org.elasticsearch.action.ActionListener$4.onResponse(ActionListener.java:163)
at org.elasticsearch.action.ActionListener$4.onResponse(ActionListener.java:163)
at org.elasticsearch.action.ActionListener.onResponse(ActionListener.java:212)
at org.elasticsearch.snapshots.SnapshotsService.completeListenersIgnoringException(SnapshotsService.java:2610)
at org.elasticsearch.snapshots.SnapshotsService.lambda$finalizeSnapshotEntry$34(SnapshotsService.java:1557)
at org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63)
at org.elasticsearch.repositories.blobstore.BlobStoreRepository.lambda$finalizeSnapshot$37(BlobStoreRepository.java:1118)
at org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63)
at org.elasticsearch.action.ActionRunnable.lambda$supply$0(ActionRunnable.java:58)
at org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:73)
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:743)
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630)
at java.base/java.lang.Thread.run(Thread.java:832)
Suppressed: [logs-2020/6ufyGTYUW9a0yoxa-8UQkg][[logs-2020][0]] IndexShardSnapshotFailedException[UncategorizedExecutionException[Failed execution]; nested: ExecutionException[java.io.IOException: Input/output error]; nested: IOException[Input/output error]]
at org.elasticsearch.snapshots.SnapshotShardFailure.<init>(SnapshotShardFailure.java:77)
at org.elasticsearch.snapshots.SnapshotShardFailure.<init>(SnapshotShardFailure.java:65)
at org.elasticsearch.snapshots.SnapshotsService.finalizeSnapshotEntry(SnapshotsService.java:1520)
at org.elasticsearch.snapshots.SnapshotsService.access$2100(SnapshotsService.java:127)
at org.elasticsearch.snapshots.SnapshotsService$7.onResponse(SnapshotsService.java:1468)
at org.elasticsearch.snapshots.SnapshotsService$7.onResponse(SnapshotsService.java:1465)
at org.elasticsearch.repositories.blobstore.BlobStoreRepository.getRepositoryData(BlobStoreRepository.java:1310)
at org.elasticsearch.snapshots.SnapshotsService.endSnapshot(SnapshotsService.java:1465)
at org.elasticsearch.snapshots.SnapshotsService.access$900(SnapshotsService.java:127)
at org.elasticsearch.snapshots.SnapshotsService$16.clusterStateProcessed(SnapshotsService.java:3105)
at org.elasticsearch.cluster.service.MasterService$SafeClusterStateTaskListener.clusterStateProcessed(MasterService.java:534)
at org.elasticsearch.cluster.service.MasterService$TaskOutputs.lambda$processedDifferentClusterState$1(MasterService.java:421)
at java.base/java.util.ArrayList.forEach(ArrayList.java:1511)
at org.elasticsearch.cluster.service.MasterService$TaskOutputs.processedDifferentClusterState(MasterService.java:421)
at org.elasticsearch.cluster.service.MasterService.onPublicationSuccess(MasterService.java:281)
at org.elasticsearch.cluster.service.MasterService.publish(MasterService.java:273)
at org.elasticsearch.cluster.service.MasterService.runTasks(MasterService.java:250)
at org.elasticsearch.cluster.service.MasterService.access$000(MasterService.java:73)
at org.elasticsearch.cluster.service.MasterService$Batcher.run(MasterService.java:151)
at org.elasticsearch.cluster.service.TaskBatcher.runIfNotProcessed(TaskBatcher.java:150)
at org.elasticsearch.cluster.service.TaskBatcher$BatchedTask.run(TaskBatcher.java:188)
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:684)
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:252)
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:215)
... 3 more
","suppressed":[{"type":"index_shard_snapshot_failed_exception","reason":"UncategorizedExecutionException[Failed execution]; nested: ExecutionException[java.io.IOException: Input/output error]; nested: IOException[Input/output error]","index_uuid":"6ufyGTYUW9a0yoxa-8UQkg","shard":"0","index":"logs-2020","stack_trace":"[logs-2020/6ufyGTYUW9a0yoxa-8UQkg][[logs-2020][0]] IndexShardSnapshotFailedException[UncategorizedExecutionException[Failed execution]; nested: ExecutionException[java.io.IOException: Input/output error]; nested: IOException[Input/output error]]
at org.elasticsearch.snapshots.SnapshotShardFailure.<init>(SnapshotShardFailure.java:77)
at org.elasticsearch.snapshots.SnapshotShardFailure.<init>(SnapshotShardFailure.java:65)
at org.elasticsearch.snapshots.SnapshotsService.finalizeSnapshotEntry(SnapshotsService.java:1520)
at org.elasticsearch.snapshots.SnapshotsService.access$2100(SnapshotsService.java:127)
at org.elasticsearch.snapshots.SnapshotsService$7.onResponse(SnapshotsService.java:1468)
at org.elasticsearch.snapshots.SnapshotsService$7.onResponse(SnapshotsService.java:1465)
at org.elasticsearch.repositories.blobstore.BlobStoreRepository.getRepositoryData(BlobStoreRepository.java:1310)
at org.elasticsearch.snapshots.SnapshotsService.endSnapshot(SnapshotsService.java:1465)
at org.elasticsearch.snapshots.SnapshotsService.access$900(SnapshotsService.java:127)
at org.elasticsearch.snapshots.SnapshotsService$16.clusterStateProcessed(SnapshotsService.java:3105)
at org.elasticsearch.cluster.service.MasterService$SafeClusterStateTaskListener.clusterStateProcessed(MasterService.java:534)
at org.elasticsearch.cluster.service.MasterService$TaskOutputs.lambda$processedDifferentClusterState$1(MasterService.java:421)
at java.base/java.util.ArrayList.forEach(ArrayList.java:1511)
at org.elasticsearch.cluster.service.MasterService$TaskOutputs.processedDifferentClusterState(MasterService.java:421)
at org.elasticsearch.cluster.service.MasterService.onPublicationSuccess(MasterService.java:281)
at org.elasticsearch.cluster.service.MasterService.publish(MasterService.java:273)
at org.elasticsearch.cluster.service.MasterService.runTasks(MasterService.java:250)
at org.elasticsearch.cluster.service.MasterService.access$000(MasterService.java:73)
at org.elasticsearch.cluster.service.MasterService$Batcher.run(MasterService.java:151)
at org.elasticsearch.cluster.service.TaskBatcher.runIfNotProcessed(TaskBatcher.java:150)
at org.elasticsearch.cluster.service.TaskBatcher$BatchedTask.run(TaskBatcher.java:188)
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:684)
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:252)
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:215)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630)
at java.base/java.lang.Thread.run(Thread.java:832)\n"}]}"""