Hello!
I am currently running ElasticSearch 7.9.1 on the AWS ElasticSerach service.
I have successfully restored a snapshot that I have made on my self-managed 7.2 instance.
The cluster works well, can easily ingest the required data and responds to my application's searches quickly. However, from time to time, all APIs stop responding and I am getting the following stack traces (see below).
This happens for about 30 minutes until all searches work again. I couldn't correlate the errors with any metrics on the machine (RAM usage is low, CPU usage is low, request rate is low, etc.)
[2020-12-03T08:38:52,511][WARN ][r.suppressed ] [efccc5336853afe6bfe7d31004a01b98] path: __PATH__ params: {index=censored_index_name}
org.elasticsearch.action.search.SearchPhaseExecutionException: all shards failed
at org.elasticsearch.action.search.AbstractSearchAsyncAction.onPhaseFailure(AbstractSearchAsyncAction.java:551) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.executeNextPhase(AbstractSearchAsyncAction.java:309) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.onPhaseDone(AbstractSearchAsyncAction.java:582) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.onShardFailure(AbstractSearchAsyncAction.java:393) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.access$100(AbstractSearchAsyncAction.java:68) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.AbstractSearchAsyncAction$1.onFailure(AbstractSearchAsyncAction.java:245) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.SearchExecutionStatsCollector.onFailure(SearchExecutionStatsCollector.java:73) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.ActionListenerResponseHandler.handleException(ActionListenerResponseHandler.java:59) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.SearchTransportService$ConnectionCountingHandler.handleException(SearchTransportService.java:403) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService$6.handleException(TransportService.java:638) [elasticsearch-7.9.1.jar:7.9.1]
__AMAZON_INTERNAL__
at org.elasticsearch.transport.TransportService$ContextRestoreResponseHandler.handleException(TransportService.java:1172) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService$DirectResponseChannel.processException(TransportService.java:1281) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService$DirectResponseChannel.sendResponse(TransportService.java:1255) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService.sendLocalRequest(TransportService.java:828) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService.access$100(TransportService.java:76) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService$3.sendRequest(TransportService.java:130) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService.sendRequestInternal(TransportService.java:738) [elasticsearch-7.9.1.jar:7.9.1]
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
at org.elasticsearch.transport.TransportService.sendRequest(TransportService.java:652) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService.sendChildRequest(TransportService.java:703) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService.sendChildRequest(TransportService.java:695) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.SearchTransportService.sendExecuteQuery(SearchTransportService.java:138) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.SearchQueryThenFetchAsyncAction.executePhaseOnShard(SearchQueryThenFetchAsyncAction.java:79) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.lambda$performPhaseOnShard$3(AbstractSearchAsyncAction.java:231) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.performPhaseOnShard(AbstractSearchAsyncAction.java:266) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.onShardFailure(AbstractSearchAsyncAction.java:400) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.access$100(AbstractSearchAsyncAction.java:68) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.AbstractSearchAsyncAction$1.onFailure(AbstractSearchAsyncAction.java:245) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.SearchExecutionStatsCollector.onFailure(SearchExecutionStatsCollector.java:73) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.ActionListenerResponseHandler.handleException(ActionListenerResponseHandler.java:59) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.action.search.SearchTransportService$ConnectionCountingHandler.handleException(SearchTransportService.java:403) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService$6.handleException(TransportService.java:638) [elasticsearch-7.9.1.jar:7.9.1]
__AMAZON_INTERNAL__
at org.elasticsearch.transport.TransportService$ContextRestoreResponseHandler.handleException(TransportService.java:1172) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.InboundHandler.lambda$handleException$2(InboundHandler.java:235) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.common.util.concurrent.EsExecutors$DirectExecutorService.execute(EsExecutors.java:255) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.InboundHandler.handleException(InboundHandler.java:233) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.InboundHandler.handlerResponseError(InboundHandler.java:225) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.InboundHandler.messageReceived(InboundHandler.java:115) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.InboundHandler.inboundMessage(InboundHandler.java:78) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TcpTransport.inboundMessage(TcpTransport.java:692) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.InboundPipeline.forwardFragments(InboundPipeline.java:142) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.InboundPipeline.doHandleBytes(InboundPipeline.java:117) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.InboundPipeline.handleBytes(InboundPipeline.java:82) [elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.netty4.Netty4MessageChannelHandler.channelRead(Netty4MessageChannelHandler.java:76) [transport-netty4-client-7.9.1.jar:7.9.1]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.handler.logging.LoggingHandler.channelRead(LoggingHandler.java:271) [netty-handler-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.handler.ssl.SslHandler.unwrap(SslHandler.java:1518) [netty-handler-4.1.49.Final.jar:4.1.49.Final]
at io.netty.handler.ssl.SslHandler.decodeJdkCompatible(SslHandler.java:1267) [netty-handler-4.1.49.Final.jar:4.1.49.Final]
at io.netty.handler.ssl.SslHandler.decode(SslHandler.java:1314) [netty-handler-4.1.49.Final.jar:4.1.49.Final]
at io.netty.handler.codec.ByteToMessageDecoder.decodeRemovalReentryProtection(ByteToMessageDecoder.java:501) [netty-codec-4.1.49.Final.jar:4.1.49.Final]
at io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:440) [netty-codec-4.1.49.Final.jar:4.1.49.Final]
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:276) [netty-codec-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:163) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:714) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:615) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:578) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989) [netty-common-4.1.49.Final.jar:4.1.49.Final]
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) [netty-common-4.1.49.Final.jar:4.1.49.Final]
at java.lang.Thread.run(Thread.java:834) [?:?]
Caused by: org.elasticsearch.ElasticsearchException: java.io.OptionalDataException
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:72) ~[elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService.sendLocalRequest(TransportService.java:794) ~[elasticsearch-7.9.1.jar:7.9.1]
... 62 more
Caused by: java.io.OptionalDataException
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669) ~[?:?]
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:464) ~[?:?]
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:422) ~[?:?]
at java.util.HashSet.readObject(HashSet.java:341) ~[?:?]
at jdk.internal.reflect.GeneratedMethodAccessor70.invoke(Unknown Source) ~[?:?]
at jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:?]
at java.lang.reflect.Method.invoke(Method.java:566) ~[?:?]
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1160) ~[?:?]
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2271) ~[?:?]
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2142) ~[?:?]
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1646) ~[?:?]
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2410) ~[?:?]
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2304) ~[?:?]
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2142) ~[?:?]
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1646) ~[?:?]
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:464) ~[?:?]
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:422) ~[?:?]
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
__AMAZON_INTERNAL__
at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:72) ~[elasticsearch-7.9.1.jar:7.9.1]
at org.elasticsearch.transport.TransportService.sendLocalRequest(TransportService.java:794) ~[elasticsearch-7.9.1.jar:7.9.1]
... 62 more