Hi Team,
We got below exceptions multiple times in our cluster (2 nodes) setup. After some times it gets auto recovered and ES is running fine. We want to understand what is the exact cause and is there any data loss scenario due to this.
org.elasticsearch.action.FailedNodeException: Failed node [QyoH4l9vTPyn8pwE32G6oA]
at org.elasticsearch.action.support.nodes.TransportNodesAction$AsyncAction.onFailure(TransportNodesAction.java:247) [elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.action.support.nodes.TransportNodesAction$AsyncAction.access$300(TransportNodesAction.java:160) [elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.action.support.nodes.TransportNodesAction$AsyncAction$1.handleException(TransportNodesAction.java:219) [elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.transport.TransportService$ContextRestoreResponseHandler.handleException(TransportService.java:1024) [elasticsearch-5.2.2.jar:5.2.2]
at ......
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.onFailure(ThreadContext.java:581) [elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:39) [elasticsearch-5.2.2.jar:5.2.2]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_152]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_152]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_152]
Caused by: org.elasticsearch.transport.RemoteTransportException: [QyoH4l9][10.133.168.249:9300][internal:gateway/local/started_shards[n]]
Caused by: org.elasticsearch.ElasticsearchException: failed to load started shards
at org.elasticsearch.gateway.TransportNodesListGatewayStartedShards.nodeOperation(TransportNodesListGatewayStartedShards.java:171) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.gateway.TransportNodesListGatewayStartedShards.nodeOperation(TransportNodesListGatewayStartedShards.java:61) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.action.support.nodes.TransportNodesAction.nodeOperation(TransportNodesAction.java:145) ~[elasticsearch-5.2.2.jar:5.2.2]
........~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:596) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) ~[elasticsearch-5.2.2.jar:5.2.2]
... 3 more
Caused by: org.elasticsearch.ElasticsearchException: java.io.IOException: failed to read [id:149, legacy:false, file:C:\NEW\truesightpserver\modules\elasticsearch\data\nodes\0\indices\1CueDM1RTCanv6L_3a902w_state\state-149.st]
at org.elasticsearch.ExceptionsHelper.maybeThrowRuntimeAndSuppress(ExceptionsHelper.java:150) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.gateway.MetaDataStateFormat.loadLatestState(MetaDataStateFormat.java:334) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.gateway.TransportNodesListGatewayStartedShards.nodeOperation(Tr.......
at org.elasticsearch.action.support.nodes.TransportNodesAction$NodeTransportHandler.messageReceived(TransportNodesAction.java:266) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:69) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.transport.TransportService$7.doRun(TransportService.java:610) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:596) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) ~[elasticsearch-5.2.2.jar:5.2.2]
... 3 more
Caused by: java.io.IOException: failed to read [id:149, legacy:false, file:C:\NEW\truesightpserver\modules\elasticsearch\data\nodes\0\indices\1CueDM1RTCanv6L_3a902w_state\state-149.st]
at org.elasticsearch.gateway.MetaDataStateFormat.loadLatestState(MetaDataStateFormat.java:327) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.gateway.TransportNodesListGatewayStartedShards.nodeOperation(TransportNodesListGatewayStartedShards.java:127) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) ~[elasticsearch-5.2.2.jar:5.2.2]
... 3 more
Caused by: java.nio.file.NoSuchFileException: C:\NEW\truesightpserver\modules\elasticsearch\data\nodes\0\indices\1CueDM1RTCanv6L_3a902w_state\state-149.st
at sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:79) ~[?:1.8.0_152]
at sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:97) ~[?:1.8.0_152]
at sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:102) ~[?:1.8.0_152]
at sun.nio.fs.WindowsFileSystemProvider.newByteChannel(WindowsFileSystemProvider.java:230) ~[?:1.8.0_152]
at java.nio.file.Files.newByteChannel(Files.java:361) ~[?:1.8.0_152]
at java.nio.file.Files.newByteChannel(Files.java:407) ~[?:1.8.0_152]
at org.apache.lucene.store.SimpleFSDirectory.openInput(SimpleFSDirectory.java:77) ~[lucene-core-6.4.1.jar:6.4.1 72f75b2503fa0aa4f0aff76d439874feb923bb0e - jpountz - 2017-02-01 14:43:32]
at org.elasticsearch.gateway.MetaDataStateFormat.read(MetaDataStateFormat.java:187) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.gateway.MetaDataStateFormat.loadLatestState(MetaDataStateFormat.java:322) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.gateway.TransportNodesListGatewayStartedShards.nodeOperation(TransportNodesListGatewayStartedShards.java:127) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.gateway.TransportNodesListGatewayStartedShards.nodeOperation(TransportNodesListGatewayStartedShards.java:61) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.action.support.nodes.TransportNodesAction.nodeOperation(TransportNodesAction.java:145) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.action.support.nodes.TransportNodesAction$NodeTransportHandler.messageReceived(TransportNodesAction.java:270) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.action.support.nodes.TransportNodesAction$NodeTransportHandler.messageReceived(TransportNodesAction.java:266) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:69) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.transport.TransportService$7.doRun(TransportService.java:610) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:596) ~[elasticsearch-5.2.2.jar:5.2.2]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) ~[elasticsearch-5.2.2.jar:5.2.2]
... 3 more