More logs from a different ocassion when the same issue happened again. Out of ~50k documents, only 250 were there and others lost in a particular index.
{
"@timestamp": "2024-11-07T08:12:58.303Z",
"log.level": "WARN",
"message": "path: /suggestedconnectionsdb/_search, params: {typed_keys=true, index=suggestedconnectionsdb}, status: 503",
"ecs.version": "1.2.0",
"service.name": "ES_ECS",
"event.dataset": "elasticsearch.server",
"process.thread.name": "elasticsearch[e6f71666ef1c][search][T#15]",
"log.logger": "rest.suppressed",
"elasticsearch.cluster.uuid": "rFFNxXCdSuWNql4Ue33KCQ",
"elasticsearch.node.id": "kn5WbDj1SNmnnzaEdYtAQA",
"elasticsearch.node.name": "e6f71666ef1c",
"elasticsearch.cluster.name": "docker-cluster",
"error.type": "org.elasticsearch.action.search.SearchPhaseExecutionException",
"error.message": "all shards failed",
"error.stack_trace": "Failed to execute phase [query], all shards failed; shardFailures {[kn5WbDj1SNmnnzaEdYtAQA][suggestedconnectionsdb][0]: org.elasticsearch.action.NoShardAvailableActionException: [e6f71666ef1c][172.18.0.2:9300][indices:data/read/search[phase/query]]\n}\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.search.AbstractSearchAsyncAction.onPhaseFailure(AbstractSearchAsyncAction.java:712)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.search.AbstractSearchAsyncAction.executeNextPhase(AbstractSearchAsyncAction.java:404)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.search.AbstractSearchAsyncAction.onPhaseDone(AbstractSearchAsyncAction.java:744)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.search.AbstractSearchAsyncAction.onShardFailure(AbstractSearchAsyncAction.java:497)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.search.AbstractSearchAsyncAction$1.onFailure(AbstractSearchAsyncAction.java:335)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.ActionListenerImplementations.safeAcceptException(ActionListenerImplementations.java:62)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.ActionListenerImplementations.safeOnFailure(ActionListenerImplementations.java:73)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.DelegatingActionListener.onFailure(DelegatingActionListener.java:31)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.ActionListenerResponseHandler.handleException(ActionListenerResponseHandler.java:53)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.search.SearchTransportService$ConnectionCountingHandler.handleException(SearchTransportService.java:634)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.transport.TransportService$UnregisterChildTransportResponseHandler.handleException(TransportService.java:1751)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.transport.TransportService$ContextRestoreResponseHandler.handleException(TransportService.java:1475)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.transport.TransportService$DirectResponseChannel.processException(TransportService.java:1609)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.transport.TransportService$DirectResponseChannel.sendResponse(TransportService.java:1584)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.transport.TaskTransportChannel.sendResponse(TaskTransportChannel.java:44)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.support.ChannelActionListener.onFailure(ChannelActionListener.java:44)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.ActionRunnable.onFailure(ActionRunnable.java:146)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:28)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:33)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:984)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)\n\tat java.base/java.lang.Thread.run(Thread.java:1583)\nCaused by: org.elasticsearch.action.NoShardAvailableActionException: [e6f71666ef1c][172.18.0.2:9300][indices:data/read/search[phase/query]]\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.NoShardAvailableActionException.forOnShardFailureWrapper(NoShardAvailableActionException.java:28)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.search.AbstractSearchAsyncAction.onShardFailure(AbstractSearchAsyncAction.java:532)\n\tat org.elasticsearch.server@8.13.2/org.elasticsearch.action.search.AbstractSearchAsyncAction.onShardFailure(AbstractSearchAsyncAction.java:479)\n\t... 20 more\n"
}
.
.
.
.
{
"@timestamp": "2024-11-08T09:49:13.918Z",
"log.level": "WARN",
"message": "failed to retrieve shard stats from node [kn5WbDj1SNmnnzaEdYtAQA]",
"ecs.version": "1.2.0",
"service.name": "ES_ECS",
"event.dataset": "elasticsearch.server",
"process.thread.name": "elasticsearch[e6f71666ef1c][management][T#2]",
"log.logger": "org.elasticsearch.cluster.InternalClusterInfoService",
"elasticsearch.cluster.uuid": "rFFNxXCdSuWNql4Ue33KCQ",
"elasticsearch.node.id": "kn5WbDj1SNmnnzaEdYtAQA",
"elasticsearch.node.name": "e6f71666ef1c",
"elasticsearch.cluster.name": "docker-cluster",
"error.type": "org.elasticsearch.transport.ReceiveTimeoutTransportException",
"error.message": "[e6f71666ef1c][172.18.0.2:9300][indices:monitor/stats[n]] request_id [4056464] timed out after [15013ms]",
"error.stack_trace": "org.elasticsearch.transport.ReceiveTimeoutTransportException: [e6f71666ef1c][172.18.0.2:9300][indices:monitor/stats[n]] request_id [4056464] timed out after [15013ms]\n"
},
{
"@timestamp": "2024-11-08T09:49:13.965Z",
"log.level": "WARN",
"message": "Received response for a request that has timed out, sent [31.2s/31228ms] ago, timed out [16.2s/16215ms] ago, action [indices:monitor/stats[n]], node [{e6f71666ef1c}{kn5WbDj1SNmnnzaEdYtAQA}{1LHiL08JSySUnm2HqO-rGg}{e6f71666ef1c}{172.18.0.2}{172.18.0.2:9300}{cdfhilmrstw}{8.13.2}{7000099-8503000}{ml.allocated_processors=32, ml.allocated_processors_double=32.0, ml.max_jvm_size=11811160064, ml.config_version=12.0.0, xpack.installed=true, transform.config_version=10.0.0, ml.machine_memory=23622320128}], id [4056464]",
"ecs.version": "1.2.0",
"service.name": "ES_ECS",
"event.dataset": "elasticsearch.server",
"process.thread.name": "elasticsearch[e6f71666ef1c][management][T#5]",
"log.logger": "org.elasticsearch.transport.TransportService",
"elasticsearch.cluster.uuid": "rFFNxXCdSuWNql4Ue33KCQ",
"elasticsearch.node.id": "kn5WbDj1SNmnnzaEdYtAQA",
"elasticsearch.node.name": "e6f71666ef1c",
"elasticsearch.cluster.name": "docker-cluster"
},
{
"@timestamp": "2024-11-08T09:51:20.511Z",
"log.level": "WARN",
"message": "health check of [/usr/share/elasticsearch/data] took [6606ms] which is above the warn threshold of [5s]",
"ecs.version": "1.2.0",
"service.name": "ES_ECS",
"event.dataset": "elasticsearch.server",
"process.thread.name": "elasticsearch[e6f71666ef1c][generic][T#95]",
"log.logger": "org.elasticsearch.monitor.fs.FsHealthService",
"elasticsearch.cluster.uuid": "rFFNxXCdSuWNql4Ue33KCQ",
"elasticsearch.node.id": "kn5WbDj1SNmnnzaEdYtAQA",
"elasticsearch.node.name": "e6f71666ef1c",
"elasticsearch.cluster.name": "docker-cluster"
}