Hello, I have multiple ECK clusters (3 node) deployed the same way on different Azure AKS clusters. On all of them there a reoccurring events of failed allocation of one specific index .kibana_task_manager_8.14.1_001. It happens in different times, totally random and I don't even see any outages or pod restarts in these specific time frames.
When I run "POST _cluster/reroute?retry_failed" it reassigns the shard correctly and fixes it, but then couple days later shard fails again.
This is the full output of
GET _cluster/allocation/explain
{
"index": ".kibana_task_manager_8.14.1_001",
"shard": 0,
"primary": false
}
{
"index": ".kibana_task_manager_8.14.1_001",
"shard": 0,
"primary": false,
"current_state": "unassigned",
"unassigned_info": {
"reason": "ALLOCATION_FAILED",
"at": "2024-12-06T17:42:51.150Z",
"failed_allocation_attempts": 5,
"details": """failed shard on node [8tAkY7P0SC-rVZ8FTknQWw]: shard failure, reason [lucene commit failed], failure java.nio.file.FileAlreadyExistsException: /usr/share/elasticsearch/data/indices/tO-G6rHKRzOedCOu68f_OQ/0/index/_19w7_2_Lucene90_0.dvd
at sun.nio.fs.UnixException.translateToIOException(UnixException.java:94)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:106)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111)
at sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:261)
at java.nio.file.spi.FileSystemProvider.newOutputStream(FileSystemProvider.java:482)
at java.nio.file.Files.newOutputStream(Files.java:227)
at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:394)
at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:387)
at org.apache.lucene.store.FSDirectory.createOutput(FSDirectory.java:220)
at org.apache.lucene.store.FilterDirectory.createOutput(FilterDirectory.java:75)
at org.elasticsearch.index.store.ByteSizeCachingDirectory.createOutput(ByteSizeCachingDirectory.java:105)
at org.apache.lucene.store.FilterDirectory.createOutput(FilterDirectory.java:75)
at org.apache.lucene.store.LockValidatingDirectoryWrapper.createOutput(LockValidatingDirectoryWrapper.java:43)
at org.apache.lucene.store.TrackingDirectoryWrapper.createOutput(TrackingDirectoryWrapper.java:41)
at org.apache.lucene.store.TrackingDirectoryWrapper.createOutput(TrackingDirectoryWrapper.java:41)
at org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumer.<init>(Lucene90DocValuesConsumer.java:81)
at org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.fieldsConsumer(Lucene90DocValuesFormat.java:148)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.getInstance(PerFieldDocValuesFormat.java:231)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.getInstance(PerFieldDocValuesFormat.java:162)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.addNumericField(PerFieldDocValuesFormat.java:104)
at org.apache.lucene.index.ReadersAndUpdates.handleDVUpdates(ReadersAndUpdates.java:396)
at org.apache.lucene.index.ReadersAndUpdates.writeFieldUpdates(ReadersAndUpdates.java:620)
at org.apache.lucene.index.ReaderPool.commit(ReaderPool.java:356)
at org.apache.lucene.index.IndexWriter.writeReaderPool(IndexWriter.java:3978)
at org.apache.lucene.index.IndexWriter.prepareCommitInternal(IndexWriter.java:3681)
at org.apache.lucene.index.IndexWriter.commitInternal(IndexWriter.java:4110)
at org.apache.lucene.index.IndexWriter.commit(IndexWriter.java:4072)
at org.elasticsearch.index.engine.InternalEngine.commitIndexWriter(InternalEngine.java:2912)
at org.elasticsearch.index.engine.InternalEngine.flushHoldingLock(InternalEngine.java:2226)
at org.elasticsearch.index.engine.Engine.flush(Engine.java:1174)
at org.elasticsearch.index.engine.InternalEngine.lambda$recoverFromTranslogInternal$6(InternalEngine.java:625)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.index.engine.InternalEngine.recoverFromTranslogInternal(InternalEngine.java:597)
at org.elasticsearch.index.engine.InternalEngine.lambda$recoverFromTranslog$3(InternalEngine.java:574)
at org.elasticsearch.action.ActionListener.lambda$runWithResource$0(ActionListener.java:379)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.ActionListener.runWithResource(ActionListener.java:379)
at org.elasticsearch.index.engine.InternalEngine.recoverFromTranslog(InternalEngine.java:570)
at org.elasticsearch.index.shard.IndexShard.lambda$doLocalRecovery$20(IndexShard.java:1885)
at org.elasticsearch.action.ActionListener.lambda$runWithResource$0(ActionListener.java:379)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.ActionListener.runWithResource(ActionListener.java:379)
at org.elasticsearch.index.shard.IndexShard.lambda$doLocalRecovery$21(IndexShard.java:1866)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.support.SubscribableListener.newForked(SubscribableListener.java:128)
at org.elasticsearch.index.shard.IndexShard.doLocalRecovery(IndexShard.java:1866)
at org.elasticsearch.index.shard.IndexShard.lambda$recoverLocallyUpToGlobalCheckpoint$14(IndexShard.java:1817)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.index.shard.IndexShard.recoverLocallyUpToGlobalCheckpoint(IndexShard.java:1814)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService.lambda$doRecovery$4(PeerRecoveryTargetService.java:397)
at org.elasticsearch.action.ActionListenerImplementations$ResponseWrappingActionListener.onResponse(ActionListenerImplementations.java:245)
at org.elasticsearch.action.support.SubscribableListener$SuccessResult.complete(SubscribableListener.java:382)
at org.elasticsearch.action.support.SubscribableListener.tryComplete(SubscribableListener.java:302)
at org.elasticsearch.action.support.SubscribableListener.addListener(SubscribableListener.java:205)
at org.elasticsearch.action.support.SubscribableListener.lambda$andThen$0(SubscribableListener.java:469)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.support.SubscribableListener.newForked(SubscribableListener.java:128)
at org.elasticsearch.action.support.SubscribableListener.andThen(SubscribableListener.java:469)
at org.elasticsearch.action.support.SubscribableListener.andThen(SubscribableListener.java:433)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService.doRecovery(PeerRecoveryTargetService.java:383)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService$RecoveryRunner.doRun(PeerRecoveryTargetService.java:723)
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:984)
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.lang.Thread.run(Thread.java:1570)
""",
"last_allocation_status": "no_attempt"
},
"can_allocate": "no",
"allocate_explanation": "Elasticsearch isn't allowed to allocate this shard to any of the nodes in the cluster. Choose a node to which you expect this shard to be allocated, find this node in the node-by-node explanation, and address the reasons which prevent Elasticsearch from allocating this shard there.",
"node_allocation_decisions": [
{
"node_id": "8tAkY7P0SC-rVZ8FTknQWw",
"node_name": "",
"transport_address": "",
"node_attributes": {
"ml.allocated_processors": "2",
"ml.max_jvm_size": "5368709120",
"ml.config_version": "12.0.0",
"ml.machine_memory": "10737418240",
"ml.allocated_processors_double": "2.0",
"transform.config_version": "10.0.0",
"xpack.installed": "true",
"k8s_node_name": "04"
},
"roles": [
"data",
"data_cold",
"data_content",
"data_frozen",
"data_hot",
"data_warm",
"ingest",
"master",
"ml",
"remote_cluster_client",
"transform"
],
"node_decision": "no",
"deciders": [
{
"decider": "max_retry",
"decision": "NO",
"explanation": """shard has exceeded the maximum number of retries [5] on failed allocation attempts - manually call [POST /_cluster/reroute?retry_failed&metric=none] to retry, [unassigned_info[[reason=ALLOCATION_FAILED], at[2024-12-06T17:42:51.150Z], failed_attempts[5], failed_nodes[[8tAkY7P0SC-rVZ8FTknQWw]], delayed=false, last_node[8tAkY7P0SC-rVZ8FTknQWw], details[failed shard on node [8tAkY7P0SC-rVZ8FTknQWw]: shard failure, reason [lucene commit failed], failure java.nio.file.FileAlreadyExistsException: /usr/share/elasticsearch/data/indices/tO-G6rHKRzOedCOu68f_OQ/0/index/_19w7_2_Lucene90_0.dvd
at sun.nio.fs.UnixException.translateToIOException(UnixException.java:94)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:106)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111)
at sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:261)
at java.nio.file.spi.FileSystemProvider.newOutputStream(FileSystemProvider.java:482)
at java.nio.file.Files.newOutputStream(Files.java:227)
at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:394)
at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:387)
at org.apache.lucene.store.FSDirectory.createOutput(FSDirectory.java:220)
at org.apache.lucene.store.FilterDirectory.createOutput(FilterDirectory.java:75)
at org.elasticsearch.index.store.ByteSizeCachingDirectory.createOutput(ByteSizeCachingDirectory.java:105)
at org.apache.lucene.store.FilterDirectory.createOutput(FilterDirectory.java:75)
at org.apache.lucene.store.LockValidatingDirectoryWrapper.createOutput(LockValidatingDirectoryWrapper.java:43)
at org.apache.lucene.store.TrackingDirectoryWrapper.createOutput(TrackingDirectoryWrapper.java:41)
at org.apache.lucene.store.TrackingDirectoryWrapper.createOutput(TrackingDirectoryWrapper.java:41)
at org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumer.<init>(Lucene90DocValuesConsumer.java:81)
at org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.fieldsConsumer(Lucene90DocValuesFormat.java:148)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.getInstance(PerFieldDocValuesFormat.java:231)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.getInstance(PerFieldDocValuesFormat.java:162)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.addNumericField(PerFieldDocValuesFormat.java:104)
at org.apache.lucene.index.ReadersAndUpdates.handleDVUpdates(ReadersAndUpdates.java:396)
at org.apache.lucene.index.ReadersAndUpdates.writeFieldUpdates(ReadersAndUpdates.java:620)
at org.apache.lucene.index.ReaderPool.commit(ReaderPool.java:356)
at org.apache.lucene.index.IndexWriter.writeReaderPool(IndexWriter.java:3978)
at org.apache.lucene.index.IndexWriter.prepareCommitInternal(IndexWriter.java:3681)
at org.apache.lucene.index.IndexWriter.commitInternal(IndexWriter.java:4110)
at org.apache.lucene.index.IndexWriter.commit(IndexWriter.java:4072)
at org.elasticsearch.index.engine.InternalEngine.commitIndexWriter(InternalEngine.java:2912)
at org.elasticsearch.index.engine.InternalEngine.flushHoldingLock(InternalEngine.java:2226)
at org.elasticsearch.index.engine.Engine.flush(Engine.java:1174)
at org.elasticsearch.index.engine.InternalEngine.lambda$recoverFromTranslogInternal$6(InternalEngine.java:625)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.index.engine.InternalEngine.recoverFromTranslogInternal(InternalEngine.java:597)
at org.elasticsearch.index.engine.InternalEngine.lambda$recoverFromTranslog$3(InternalEngine.java:574)
at org.elasticsearch.action.ActionListener.lambda$runWithResource$0(ActionListener.java:379)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.ActionListener.runWithResource(ActionListener.java:379)
at org.elasticsearch.index.engine.InternalEngine.recoverFromTranslog(InternalEngine.java:570)
at org.elasticsearch.index.shard.IndexShard.lambda$doLocalRecovery$20(IndexShard.java:1885)
at org.elasticsearch.action.ActionListener.lambda$runWithResource$0(ActionListener.java:379)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.ActionListener.runWithResource(ActionListener.java:379)
at org.elasticsearch.index.shard.IndexShard.lambda$doLocalRecovery$21(IndexShard.java:1866)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.support.SubscribableListener.newForked(SubscribableListener.java:128)
at org.elasticsearch.index.shard.IndexShard.doLocalRecovery(IndexShard.java:1866)
at org.elasticsearch.index.shard.IndexShard.lambda$recoverLocallyUpToGlobalCheckpoint$14(IndexShard.java:1817)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.index.shard.IndexShard.recoverLocallyUpToGlobalCheckpoint(IndexShard.java:1814)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService.lambda$doRecovery$4(PeerRecoveryTargetService.java:397)
at org.elasticsearch.action.ActionListenerImplementations$ResponseWrappingActionListener.onResponse(ActionListenerImplementations.java:245)
at org.elasticsearch.action.support.SubscribableListener$SuccessResult.complete(SubscribableListener.java:382)
at org.elasticsearch.action.support.SubscribableListener.tryComplete(SubscribableListener.java:302)
at org.elasticsearch.action.support.SubscribableListener.addListener(SubscribableListener.java:205)
at org.elasticsearch.action.support.SubscribableListener.lambda$andThen$0(SubscribableListener.java:469)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.support.SubscribableListener.newForked(SubscribableListener.java:128)
at org.elasticsearch.action.support.SubscribableListener.andThen(SubscribableListener.java:469)
at org.elasticsearch.action.support.SubscribableListener.andThen(SubscribableListener.java:433)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService.doRecovery(PeerRecoveryTargetService.java:383)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService$RecoveryRunner.doRun(PeerRecoveryTargetService.java:723)
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:984)
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.lang.Thread.run(Thread.java:1570)
], allocation_status[no_attempt]]]"""
}
]
},
{
"node_id": "dDI5EsjORDe9skHDOIAtBw",
"node_name": "",
"transport_address": "",
"node_attributes": {
"ml.allocated_processors_double": "2.0",
"ml.machine_memory": "10737418240",
"ml.config_version": "12.0.0",
"ml.max_jvm_size": "5368709120",
"ml.allocated_processors": "2",
"xpack.installed": "true",
"transform.config_version": "10.0.0",
"k8s_node_name": "03"
},
"roles": [
"data",
"data_cold",
"data_content",
"data_frozen",
"data_hot",
"data_warm",
"ingest",
"master",
"ml",
"remote_cluster_client",
"transform"
],
"node_decision": "no",
"deciders": [
{
"decider": "max_retry",
"decision": "NO",
"explanation": """shard has exceeded the maximum number of retries [5] on failed allocation attempts - manually call [POST /_cluster/reroute?retry_failed&metric=none] to retry, [unassigned_info[[reason=ALLOCATION_FAILED], at[2024-12-06T17:42:51.150Z], failed_attempts[5], failed_nodes[[8tAkY7P0SC-rVZ8FTknQWw]], delayed=false, last_node[8tAkY7P0SC-rVZ8FTknQWw], details[failed shard on node [8tAkY7P0SC-rVZ8FTknQWw]: shard failure, reason [lucene commit failed], failure java.nio.file.FileAlreadyExistsException: /usr/share/elasticsearch/data/indices/tO-G6rHKRzOedCOu68f_OQ/0/index/_19w7_2_Lucene90_0.dvd
at sun.nio.fs.UnixException.translateToIOException(UnixException.java:94)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:106)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111)
at sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:261)
at java.nio.file.spi.FileSystemProvider.newOutputStream(FileSystemProvider.java:482)
at java.nio.file.Files.newOutputStream(Files.java:227)
at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:394)
at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:387)
at org.apache.lucene.store.FSDirectory.createOutput(FSDirectory.java:220)
at org.apache.lucene.store.FilterDirectory.createOutput(FilterDirectory.java:75)
at org.elasticsearch.index.store.ByteSizeCachingDirectory.createOutput(ByteSizeCachingDirectory.java:105)
at org.apache.lucene.store.FilterDirectory.createOutput(FilterDirectory.java:75)
at org.apache.lucene.store.LockValidatingDirectoryWrapper.createOutput(LockValidatingDirectoryWrapper.java:43)
at org.apache.lucene.store.TrackingDirectoryWrapper.createOutput(TrackingDirectoryWrapper.java:41)
at org.apache.lucene.store.TrackingDirectoryWrapper.createOutput(TrackingDirectoryWrapper.java:41)
at org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumer.<init>(Lucene90DocValuesConsumer.java:81)
at org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.fieldsConsumer(Lucene90DocValuesFormat.java:148)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.getInstance(PerFieldDocValuesFormat.java:231)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.getInstance(PerFieldDocValuesFormat.java:162)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.addNumericField(PerFieldDocValuesFormat.java:104)
at org.apache.lucene.index.ReadersAndUpdates.handleDVUpdates(ReadersAndUpdates.java:396)
at org.apache.lucene.index.ReadersAndUpdates.writeFieldUpdates(ReadersAndUpdates.java:620)
at org.apache.lucene.index.ReaderPool.commit(ReaderPool.java:356)
at org.apache.lucene.index.IndexWriter.writeReaderPool(IndexWriter.java:3978)
at org.apache.lucene.index.IndexWriter.prepareCommitInternal(IndexWriter.java:3681)
at org.apache.lucene.index.IndexWriter.commitInternal(IndexWriter.java:4110)
at org.apache.lucene.index.IndexWriter.commit(IndexWriter.java:4072)
at org.elasticsearch.index.engine.InternalEngine.commitIndexWriter(InternalEngine.java:2912)
at org.elasticsearch.index.engine.InternalEngine.flushHoldingLock(InternalEngine.java:2226)
at org.elasticsearch.index.engine.Engine.flush(Engine.java:1174)
at org.elasticsearch.index.engine.InternalEngine.lambda$recoverFromTranslogInternal$6(InternalEngine.java:625)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.index.engine.InternalEngine.recoverFromTranslogInternal(InternalEngine.java:597)
at org.elasticsearch.index.engine.InternalEngine.lambda$recoverFromTranslog$3(InternalEngine.java:574)
at org.elasticsearch.action.ActionListener.lambda$runWithResource$0(ActionListener.java:379)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.ActionListener.runWithResource(ActionListener.java:379)
at org.elasticsearch.index.engine.InternalEngine.recoverFromTranslog(InternalEngine.java:570)
at org.elasticsearch.index.shard.IndexShard.lambda$doLocalRecovery$20(IndexShard.java:1885)
at org.elasticsearch.action.ActionListener.lambda$runWithResource$0(ActionListener.java:379)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.ActionListener.runWithResource(ActionListener.java:379)
at org.elasticsearch.index.shard.IndexShard.lambda$doLocalRecovery$21(IndexShard.java:1866)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.support.SubscribableListener.newForked(SubscribableListener.java:128)
at org.elasticsearch.index.shard.IndexShard.doLocalRecovery(IndexShard.java:1866)
at org.elasticsearch.index.shard.IndexShard.lambda$recoverLocallyUpToGlobalCheckpoint$14(IndexShard.java:1817)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.index.shard.IndexShard.recoverLocallyUpToGlobalCheckpoint(IndexShard.java:1814)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService.lambda$doRecovery$4(PeerRecoveryTargetService.java:397)
at org.elasticsearch.action.ActionListenerImplementations$ResponseWrappingActionListener.onResponse(ActionListenerImplementations.java:245)
at org.elasticsearch.action.support.SubscribableListener$SuccessResult.complete(SubscribableListener.java:382)
at org.elasticsearch.action.support.SubscribableListener.tryComplete(SubscribableListener.java:302)
at org.elasticsearch.action.support.SubscribableListener.addListener(SubscribableListener.java:205)
at org.elasticsearch.action.support.SubscribableListener.lambda$andThen$0(SubscribableListener.java:469)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.support.SubscribableListener.newForked(SubscribableListener.java:128)
at org.elasticsearch.action.support.SubscribableListener.andThen(SubscribableListener.java:469)
at org.elasticsearch.action.support.SubscribableListener.andThen(SubscribableListener.java:433)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService.doRecovery(PeerRecoveryTargetService.java:383)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService$RecoveryRunner.doRun(PeerRecoveryTargetService.java:723)
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:984)
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.lang.Thread.run(Thread.java:1570)
], allocation_status[no_attempt]]]"""
},
{
"decider": "same_shard",
"decision": "NO",
"explanation": "a copy of this shard is already allocated to this node [[.kibana_task_manager_8.14.1_001][0], node[dDI5EsjORDe9skHDOIAtBw], [P], s[STARTED], a[id=4d8YZdg4SpOQZgI9RDDtVw], failed_attempts[0]]"
},
{
"decider": "awareness",
"decision": "NO",
"explanation": "there are [2] copies of this shard and [3] values for attribute [k8s_node_name] ([02, 03, 04] from nodes in the cluster and no forced awareness) so there may be at most [1] copies of this shard allocated to nodes with each value, but (including this copy) there would be [2] copies allocated to nodes with [node.attr.k8s_node_name: 03]"
}
]
},
{
"node_id": "wND5cQPoQHWghsfozwT-ow",
"node_name": "",
"transport_address": "",
"node_attributes": {
"ml.allocated_processors": "2",
"ml.max_jvm_size": "5368709120",
"ml.config_version": "12.0.0",
"ml.machine_memory": "10737418240",
"ml.allocated_processors_double": "2.0",
"transform.config_version": "10.0.0",
"xpack.installed": "true",
"k8s_node_name": "02"
},
"roles": [
"data",
"data_cold",
"data_content",
"data_frozen",
"data_hot",
"data_warm",
"ingest",
"master",
"ml",
"remote_cluster_client",
"transform"
],
"node_decision": "no",
"deciders": [
{
"decider": "max_retry",
"decision": "NO",
"explanation": """shard has exceeded the maximum number of retries [5] on failed allocation attempts - manually call [POST /_cluster/reroute?retry_failed&metric=none] to retry, [unassigned_info[[reason=ALLOCATION_FAILED], at[2024-12-06T17:42:51.150Z], failed_attempts[5], failed_nodes[[8tAkY7P0SC-rVZ8FTknQWw]], delayed=false, last_node[8tAkY7P0SC-rVZ8FTknQWw], details[failed shard on node [8tAkY7P0SC-rVZ8FTknQWw]: shard failure, reason [lucene commit failed], failure java.nio.file.FileAlreadyExistsException: /usr/share/elasticsearch/data/indices/tO-G6rHKRzOedCOu68f_OQ/0/index/_19w7_2_Lucene90_0.dvd
at sun.nio.fs.UnixException.translateToIOException(UnixException.java:94)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:106)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111)
at sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:261)
at java.nio.file.spi.FileSystemProvider.newOutputStream(FileSystemProvider.java:482)
at java.nio.file.Files.newOutputStream(Files.java:227)
at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:394)
at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:387)
at org.apache.lucene.store.FSDirectory.createOutput(FSDirectory.java:220)
at org.apache.lucene.store.FilterDirectory.createOutput(FilterDirectory.java:75)
at org.elasticsearch.index.store.ByteSizeCachingDirectory.createOutput(ByteSizeCachingDirectory.java:105)
at org.apache.lucene.store.FilterDirectory.createOutput(FilterDirectory.java:75)
at org.apache.lucene.store.LockValidatingDirectoryWrapper.createOutput(LockValidatingDirectoryWrapper.java:43)
at org.apache.lucene.store.TrackingDirectoryWrapper.createOutput(TrackingDirectoryWrapper.java:41)
at org.apache.lucene.store.TrackingDirectoryWrapper.createOutput(TrackingDirectoryWrapper.java:41)
at org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumer.<init>(Lucene90DocValuesConsumer.java:81)
at org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.fieldsConsumer(Lucene90DocValuesFormat.java:148)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.getInstance(PerFieldDocValuesFormat.java:231)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.getInstance(PerFieldDocValuesFormat.java:162)
at org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat$FieldsWriter.addNumericField(PerFieldDocValuesFormat.java:104)
at org.apache.lucene.index.ReadersAndUpdates.handleDVUpdates(ReadersAndUpdates.java:396)
at org.apache.lucene.index.ReadersAndUpdates.writeFieldUpdates(ReadersAndUpdates.java:620)
at org.apache.lucene.index.ReaderPool.commit(ReaderPool.java:356)
at org.apache.lucene.index.IndexWriter.writeReaderPool(IndexWriter.java:3978)
at org.apache.lucene.index.IndexWriter.prepareCommitInternal(IndexWriter.java:3681)
at org.apache.lucene.index.IndexWriter.commitInternal(IndexWriter.java:4110)
at org.apache.lucene.index.IndexWriter.commit(IndexWriter.java:4072)
at org.elasticsearch.index.engine.InternalEngine.commitIndexWriter(InternalEngine.java:2912)
at org.elasticsearch.index.engine.InternalEngine.flushHoldingLock(InternalEngine.java:2226)
at org.elasticsearch.index.engine.Engine.flush(Engine.java:1174)
at org.elasticsearch.index.engine.InternalEngine.lambda$recoverFromTranslogInternal$6(InternalEngine.java:625)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.index.engine.InternalEngine.recoverFromTranslogInternal(InternalEngine.java:597)
at org.elasticsearch.index.engine.InternalEngine.lambda$recoverFromTranslog$3(InternalEngine.java:574)
at org.elasticsearch.action.ActionListener.lambda$runWithResource$0(ActionListener.java:379)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.ActionListener.runWithResource(ActionListener.java:379)
at org.elasticsearch.index.engine.InternalEngine.recoverFromTranslog(InternalEngine.java:570)
at org.elasticsearch.index.shard.IndexShard.lambda$doLocalRecovery$20(IndexShard.java:1885)
at org.elasticsearch.action.ActionListener.lambda$runWithResource$0(ActionListener.java:379)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.ActionListener.runWithResource(ActionListener.java:379)
at org.elasticsearch.index.shard.IndexShard.lambda$doLocalRecovery$21(IndexShard.java:1866)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.support.SubscribableListener.newForked(SubscribableListener.java:128)
at org.elasticsearch.index.shard.IndexShard.doLocalRecovery(IndexShard.java:1866)
at org.elasticsearch.index.shard.IndexShard.lambda$recoverLocallyUpToGlobalCheckpoint$14(IndexShard.java:1817)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.index.shard.IndexShard.recoverLocallyUpToGlobalCheckpoint(IndexShard.java:1814)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService.lambda$doRecovery$4(PeerRecoveryTargetService.java:397)
at org.elasticsearch.action.ActionListenerImplementations$ResponseWrappingActionListener.onResponse(ActionListenerImplementations.java:245)
at org.elasticsearch.action.support.SubscribableListener$SuccessResult.complete(SubscribableListener.java:382)
at org.elasticsearch.action.support.SubscribableListener.tryComplete(SubscribableListener.java:302)
at org.elasticsearch.action.support.SubscribableListener.addListener(SubscribableListener.java:205)
at org.elasticsearch.action.support.SubscribableListener.lambda$andThen$0(SubscribableListener.java:469)
at org.elasticsearch.action.ActionListener.run(ActionListener.java:356)
at org.elasticsearch.action.support.SubscribableListener.newForked(SubscribableListener.java:128)
at org.elasticsearch.action.support.SubscribableListener.andThen(SubscribableListener.java:469)
at org.elasticsearch.action.support.SubscribableListener.andThen(SubscribableListener.java:433)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService.doRecovery(PeerRecoveryTargetService.java:383)
at org.elasticsearch.indices.recovery.PeerRecoveryTargetService$RecoveryRunner.doRun(PeerRecoveryTargetService.java:723)
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:984)
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
at java.lang.Thread.run(Thread.java:1570)
], allocation_status[no_attempt]]]"""
}
]
}
]
}