Failed to snapshot shard# 2

Our configuration:

  • all nodes are hosted on a jelastic cloud platform
  • all nodes are Docker containers
  • 3 ES nodes, v7.17.5
  • 1 Kibana node, v7.17.5
  • 1 storage node (NFS) with enough space to store the backup data
  • the backup folder is mounted on each ES node and points to the folder /data on the storage node

Problem:

  • since about 4 weeks, we have this issue "failed to snapshot shard" when a snapshot is created; the failure is not always on the same indices and same shards
  • despite removing the repository, and re-creating snapshots, we have the same issue
"type": "server", "timestamp": "2022-12-22T22:11:39,128Z", "level": "WARN", "component": "o.e.s.SnapshotShardsService", "cluster.name": "cpdlc-cluster-isp", "node.name": "node107859-isp-cpdlc-cockpit.jcloud.ik-server.com", "message": "[[.monitoring-logstash-7-2022.12.19][0]][repo-7.17.5:daily-snap-2022.12.22-07kclfbssryr0ja7vo32fq/pDwN98avRResK35lu5JXQQ] failed to snapshot shard", "cluster.uuid": "-tz_xsBwSYa-_1pafbz-Gg", "node.id": "6K5bVet6SVuiZzpEyAuF_g" , 
"stacktrace": ["org.elasticsearch.common.util.concurrent.UncategorizedExecutionException: Failed execution",
"at org.elasticsearch.common.util.concurrent.FutureUtils.rethrowExecutionException(FutureUtils.java:80) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.FutureUtils.get(FutureUtils.java:72) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.ListenableFuture.notifyListenerDirectly(ListenableFuture.java:112) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.ListenableFuture.done(ListenableFuture.java:100) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.BaseFuture.setException(BaseFuture.java:149) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.ListenableFuture.onFailure(ListenableFuture.java:147) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.action.StepListener.innerOnFailure(StepListener.java:57) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.action.NotifyOnceListener.onFailure(NotifyOnceListener.java:36) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.action.ActionListener$Delegating.onFailure(ActionListener.java:66) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.action.support.GroupedActionListener.onFailure(GroupedActionListener.java:72) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.repositories.blobstore.BlobStoreRepository.lambda$fileQueueListener$82(BlobStoreRepository.java:3138) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.action.ActionListener$DelegatingActionListener.onFailure(ActionListener.java:192) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.action.ActionRunnable.onFailure(ActionRunnable.java:77) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.onFailure(ThreadContext.java:765) [elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:28) [elasticsearch-7.17.5.jar:7.17.5]",
"at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) [?:?]",
"at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) [?:?]",
"at java.lang.Thread.run(Thread.java:833) [?:?]",
"Caused by: java.util.concurrent.ExecutionException: java.io.IOException: Input/output error",
"at org.elasticsearch.common.util.concurrent.BaseFuture$Sync.getValue(BaseFuture.java:257) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.BaseFuture$Sync.get(BaseFuture.java:231) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.BaseFuture.get(BaseFuture.java:53) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.FutureUtils.get(FutureUtils.java:65) ~[elasticsearch-7.17.5.jar:7.17.5]",
"... 16 more",
"Caused by: java.io.IOException: Input/output error",
"at java.io.FileDescriptor.close0(Native Method) ~[?:?]",
"at java.io.FileDescriptor.close(FileDescriptor.java:297) ~[?:?]",
"at java.io.FileDescriptor$1.close(FileDescriptor.java:88) ~[?:?]",
"at sun.nio.ch.FileChannelImpl$Closer.run(FileChannelImpl.java:115) ~[?:?]",
"at jdk.internal.ref.CleanerImpl$PhantomCleanableRef.performCleanup(CleanerImpl.java:178) ~[?:?]",
"at jdk.internal.ref.PhantomCleanable.clean(PhantomCleanable.java:133) ~[?:?]",
"at sun.nio.ch.FileChannelImpl.implCloseChannel(FileChannelImpl.java:207) ~[?:?]",
"at java.nio.channels.spi.AbstractInterruptibleChannel.close(AbstractInterruptibleChannel.java:112) ~[?:?]",
"at sun.nio.ch.ChannelOutputStream.close(ChannelOutputStream.java:127) ~[?:?]",
"at org.elasticsearch.core.internal.io.IOUtils.close(IOUtils.java:74) ~[elasticsearch-core-7.17.5.jar:7.17.5]",
"at org.elasticsearch.core.internal.io.IOUtils.close(IOUtils.java:116) ~[elasticsearch-core-7.17.5.jar:7.17.5]",
"at org.elasticsearch.core.internal.io.IOUtils.close(IOUtils.java:87) ~[elasticsearch-core-7.17.5.jar:7.17.5]",
"at org.elasticsearch.core.internal.io.Streams.copy(Streams.java:52) ~[elasticsearch-core-7.17.5.jar:7.17.5]",
"at org.elasticsearch.core.internal.io.Streams.copy(Streams.java:68) ~[elasticsearch-core-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.blobstore.fs.FsBlobContainer.writeToPath(FsBlobContainer.java:317) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.blobstore.fs.FsBlobContainer.writeBlob(FsBlobContainer.java:221) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.repositories.blobstore.BlobStoreRepository.snapshotFile(BlobStoreRepository.java:3493) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.repositories.blobstore.BlobStoreRepository.lambda$executeOneFileSnapshot$78(BlobStoreRepository.java:2938) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777) ~[elasticsearch-7.17.5.jar:7.17.5]",
"at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26) ~[elasticsearch-7.17.5.jar:7.17.5]",
"... 3 more",
"Suppressed: java.io.IOException: Input/output error",
"\tat sun.nio.ch.FileDispatcherImpl.write0(Native Method) ~[?:?]",
"\tat sun.nio.ch.FileDispatcherImpl.write(FileDispatcherImpl.java:62) ~[?:?]",
"\tat sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:137) ~[?:?]",
"\tat sun.nio.ch.IOUtil.write(IOUtil.java:102) ~[?:?]",
"\tat sun.nio.ch.IOUtil.write(IOUtil.java:72) ~[?:?]",
"\tat sun.nio.ch.FileChannelImpl.write(FileChannelImpl.java:288) ~[?:?]",
"\tat sun.nio.ch.ChannelOutputStream.writeFullyImpl(ChannelOutputStream.java:60) ~[?:?]",
"\tat sun.nio.ch.ChannelOutputStream.writeFully(ChannelOutputStream.java:82) ~[?:?]",
"\tat sun.nio.ch.ChannelOutputStream.write(ChannelOutputStream.java:122) ~[?:?]",
"\tat org.elasticsearch.core.internal.io.Streams.copy(Streams.java:42) ~[elasticsearch-core-7.17.5.jar:7.17.5]",
"\tat org.elasticsearch.core.internal.io.Streams.copy(Streams.java:68) ~[elasticsearch-core-7.17.5.jar:7.17.5]",
"\tat org.elasticsearch.common.blobstore.fs.FsBlobContainer.writeToPath(FsBlobContainer.java:317) ~[elasticsearch-7.17.5.jar:7.17.5]",
"\tat org.elasticsearch.common.blobstore.fs.FsBlobContainer.writeBlob(FsBlobContainer.java:221) ~[elasticsearch-7.17.5.jar:7.17.5]",
"\tat org.elasticsearch.repositories.blobstore.BlobStoreRepository.snapshotFile(BlobStoreRepository.java:3493) ~[elasticsearch-7.17.5.jar:7.17.5]",
"\tat org.elasticsearch.repositories.blobstore.BlobStoreRepository.lambda$executeOneFileSnapshot$78(BlobStoreRepository.java:2938) ~[elasticsearch-7.17.5.jar:7.17.5]",
"\tat org.elasticsearch.action.ActionRunnable$2.doRun(ActionRunnable.java:62) ~[elasticsearch-7.17.5.jar:7.17.5]",
"\tat org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:777) [elasticsearch-7.17.5.jar:7.17.5]",
"\tat org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26) [elasticsearch-7.17.5.jar:7.17.5]",
"\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) [?:?]",
"\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) [?:?]",
"\tat java.lang.Thread.run(Thread.java:833) [?:?]"] }

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.