I am using elastic search 7.10.2 in kubernetes but I get ShardLockObtainFailedException
at random moments but mostly when there are some search or index operation is going on.
the log is following:
Data node
{"type": "server", "timestamp": "2021-03-17T10:18:34,538Z", "level": "INFO", "component": "o.e.c.s.ClusterApplierService", "cluster.name": "elasticsearch", "node.name": "elasticsearch-1", "message": "master node changed {previous [{elasticsearch-master-0}{DzQuZ6dnSomU-jysddv5wA}{ldtkOwBlQg-NC8u7Vajufw}{172.18.0.184}{172.18.0.184:9300}{lm}{ml.machine_memory=4294967296, ml.max_open_jobs=20, xpack.installed=true, transform.node=false}], current []}, term: 2, version: 12618, reason: becoming candidate: onLeaderFailure", "cluster.uuid": "tF-zsp0sRwWBx0xTclss-g", "node.id": "QKWSHwjvSOel2wGH-D9Pgw" }
{"type": "server", "timestamp": "2021-03-17T10:18:34,571Z", "level": "INFO", "component": "o.e.c.s.ClusterApplierService", "cluster.name": "elasticsearch", "node.name": "elasticsearch-1", "message": "master node changed {previous [], current [{elasticsearch-master-0}{DzQuZ6dnSomU-jysddv5wA}{ldtkOwBlQg-NC8u7Vajufw}{172.18.0.184}{172.18.0.184:9300}{lm}{ml.machine_memory=4294967296, ml.max_open_jobs=20, xpack.installed=true, transform.node=false}]}, term: 2, version: 12620, reason: ApplyCommitRequest{term=2, version=12620, sourceNode={elasticsearch-master-0}{DzQuZ6dnSomU-jysddv5wA}{ldtkOwBlQg-NC8u7Vajufw}{172.18.0.184}{172.18.0.184:9300}{lm}{ml.machine_memory=4294967296, ml.max_open_jobs=20, xpack.installed=true, transform.node=false}}", "cluster.uuid": "tF-zsp0sRwWBx0xTclss-g", "node.id": "QKWSHwjvSOel2wGH-D9Pgw" }
{"type": "server", "timestamp": "2021-03-17T10:18:39,654Z", "level": "INFO", "component": "o.e.i.s.TransportNodesListShardStoreMetadata", "cluster.name": "elasticsearch", "node.name": "elasticsearch-1", "message": "[br-feed][2]: failed to obtain shard lock", "cluster.uuid": "tF-zsp0sRwWBx0xTclss-g", "node.id": "QKWSHwjvSOel2wGH-D9Pgw" ,
"stacktrace": ["org.elasticsearch.env.ShardLockObtainFailedException: [br-feed][2]: obtaining shard lock for [read metadata snapshot] timed out after [5000ms], lock already held for [closing shard] with age [5007ms]",
"at org.elasticsearch.env.NodeEnvironment$InternalShardLock.acquire(NodeEnvironment.java:781) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.env.NodeEnvironment.shardLock(NodeEnvironment.java:687) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.index.store.Store.readMetadataSnapshot(Store.java:451) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.listStoreMetadata(TransportNodesListShardStoreMetadata.java:173) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.nodeOperation(TransportNodesListShardStoreMetadata.java:117) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.nodeOperation(TransportNodesListShardStoreMetadata.java:67) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.action.support.nodes.TransportNodesAction.nodeOperation(TransportNodesAction.java:158) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.action.support.nodes.TransportNodesAction$NodeTransportHandler.messageReceived(TransportNodesAction.java:271) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.action.support.nodes.TransportNodesAction$NodeTransportHandler.messageReceived(TransportNodesAction.java:267) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.xpack.security.transport.SecurityServerTransportInterceptor$ProfileSecuredRequestHandler$1.doRun(SecurityServerTransportInterceptor.java:257) [x-pack-security-7.10.2.jar:7.10.2]",
"at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.xpack.security.transport.SecurityServerTransportInterceptor$ProfileSecuredRequestHandler.messageReceived(SecurityServerTransportInterceptor.java:315) [x-pack-security-7.10.2.jar:7.10.2]",
"at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:72) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.transport.InboundHandler$RequestHandler.doRun(InboundHandler.java:305) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:743) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-7.10.2.jar:7.10.2]",
"at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130) [?:?]",
"at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630) [?:?]",
"at java.lang.Thread.run(Thread.java:832) [?:?]"] }
master node
"type": "server", "timestamp": "2021-03-17T10:19:42,545Z", "level": "WARN", "component": "o.e.c.r.a.AllocationService", "cluster.name": "elasticsearch", "node.name": "elasticsearch-master-0", "message": "failing shard [failed shard, shard [br-feed][2], node[QKWSHwjvSOel2wGH-D9Pgw], [R], recovery_source[peer recovery], s[INITIALIZING], a[id=tAWumu9bRqafyQc5Uc0iBA], unassigned_info[[reason=NODE_LEFT], at[2021-03-17T10:18:32.517Z], delayed=false, details[node_left [QKWSHwjvSOel2wGH-D9Pgw]], allocation_status[no_attempt]], expected_shard_size[176214948], message [failed to create shard], failure [IOException[failed to obtain in-memory shard lock]; nested: ShardLockObtainFailedException[[br-feed][2]: obtaining shard lock for [starting shard] timed out after [5000ms], lock already held for [closing shard] with age [62891ms]]; ], markAsStale [true]]", "cluster.uuid": "tF-zsp0sRwWBx0xTclss-g", "node.id": "DzQuZ6dnSomU-jysddv5wA" ,
"stacktrace": ["java.io.IOException: failed to obtain in-memory shard lock",
"at org.elasticsearch.index.IndexService.createShard(IndexService.java:490) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.indices.IndicesService.createShard(IndicesService.java:766) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.indices.IndicesService.createShard(IndicesService.java:177) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.indices.cluster.IndicesClusterStateService.createShard(IndicesClusterStateService.java:593) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.indices.cluster.IndicesClusterStateService.createOrUpdateShards(IndicesClusterStateService.java:570) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.indices.cluster.IndicesClusterStateService.applyClusterState(IndicesClusterStateService.java:248) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.cluster.service.ClusterApplierService.callClusterStateAppliers(ClusterApplierService.java:510) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.cluster.service.ClusterApplierService.callClusterStateAppliers(ClusterApplierService.java:500) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.cluster.service.ClusterApplierService.applyChanges(ClusterApplierService.java:471) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.cluster.service.ClusterApplierService.runTask(ClusterApplierService.java:418) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.cluster.service.ClusterApplierService.access$000(ClusterApplierService.java:68) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.cluster.service.ClusterApplierService$UpdateTask.run(ClusterApplierService.java:162) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:684) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:252) [elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:215) [elasticsearch-7.10.2.jar:7.10.2]",
"at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130) [?:?]",
"at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630) [?:?]",
"at java.lang.Thread.run(Thread.java:832) [?:?]",
"Caused by: org.elasticsearch.env.ShardLockObtainFailedException: [br-feed][2]: obtaining shard lock for [starting shard] timed out after [5000ms], lock already held for [closing shard] with age [62891ms]",
"at org.elasticsearch.env.NodeEnvironment$InternalShardLock.acquire(NodeEnvironment.java:781) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.env.NodeEnvironment.shardLock(NodeEnvironment.java:687) ~[elasticsearch-7.10.2.jar:7.10.2]",
"at org.elasticsearch.index.IndexService.createShard(IndexService.java:410) ~[elasticsearch-7.10.2.jar:7.10.2]",
Kindly help