Unassigned shards Inquiry

Hi,
One of our teams in the company has complained that every week or so they get an alert for unassigned shards on their cluster. They asked me to look for the cause of the issue.
My question is:
Is there a way to see the reason for unassigned shards in retrospect.
I know that when we currently have unassigned shards i can call for _cluster/allocation/explain.
But is there an API call or log somewhere that shows the reason for previous unassigned shards?

There is no API that provides retroactive information. The exception might be if you have monitoring installed. It would however help to know a bit about the cluster. Can you provide the full output of the cluster stats API?

1 Like

Thanks for the quick response.
Currently we do not have monitoring enabled, but i will look into that.
Stats output:

{
    "_nodes": {
        "total": 11,
        "successful": 11,
        "failed": 0
    },
    "cluster_name": "es-org2-stg",
    "timestamp": 1629101273066,
    "status": "green",
    "indices": {
        "count": 1,
        "shards": {
            "total": 8,
            "primaries": 1,
            "replication": 7.0,
            "index": {
                "shards": {
                    "min": 8,
                    "max": 8,
                    "avg": 8.0
                },
                "primaries": {
                    "min": 1,
                    "max": 1,
                    "avg": 1.0
                },
                "replication": {
                    "min": 7.0,
                    "max": 7.0,
                    "avg": 7.0
                }
            }
        },
        "docs": {
            "count": 5943569,
            "deleted": 4672
        },
        "store": {
            "size_in_bytes": 58869886799
        },
        "fielddata": {
            "memory_size_in_bytes": 0,
            "evictions": 0
        },
        "query_cache": {
            "memory_size_in_bytes": 742041952,
            "total_count": 31490380763,
            "hit_count": 1924835961,
            "miss_count": 29565544802,
            "cache_size": 2432,
            "cache_count": 6470149,
            "evictions": 6467717
        },
        "completion": {
            "size_in_bytes": 0
        },
        "segments": {
            "count": 24,
            "memory_in_bytes": 36110423,
            "terms_memory_in_bytes": 33261025,
            "stored_fields_memory_in_bytes": 715912,
            "term_vectors_memory_in_bytes": 0,
            "norms_memory_in_bytes": 0,
            "points_memory_in_bytes": 2011838,
            "doc_values_memory_in_bytes": 121648,
            "index_writer_memory_in_bytes": 166924384,
            "version_map_memory_in_bytes": 18217760,
            "fixed_bit_set_memory_in_bytes": 0,
            "max_unsafe_auto_id_timestamp": -1,
            "file_sizes": {}
        }
    },
    "nodes": {
        "count": {
            "total": 11,
            "data": 8,
            "coordinating_only": 0,
            "master": 3,
            "ingest": 11
        },
        "versions": [
            "6.3.2"
        ],
        "os": {
            "available_processors": 136,
            "allocated_processors": 136,
            "names": [
                {
                    "name": "Linux",
                    "count": 11
                }
            ],
            "mem": {
                "total_in_bytes": 1118245445632,
                "free_in_bytes": 555763007488,
                "used_in_bytes": 562482438144,
                "free_percent": 50,
                "used_percent": 50
            }
        },
        "process": {
            "cpu": {
                "percent": 8
            },
            "open_file_descriptors": {
                "min": 592,
                "max": 778,
                "avg": 747
            }
        },
        "jvm": {
            "max_uptime_in_millis": 6690415049,
            "versions": [
                {
                    "version": "1.8.0_144",
                    "vm_name": "Java HotSpot(TM) 64-Bit Server VM",
                    "vm_version": "25.144-b01",
                    "vm_vendor": "Oracle Corporation",
                    "count": 11
                }
            ],
            "mem": {
                "heap_used_in_bytes": 59901619632,
                "heap_max_in_bytes": 121005670400
            },
            "threads": 1610
        },
        "fs": {
            "total_in_bytes": 1107973894144,
            "free_in_bytes": 1035376537600,
            "available_in_bytes": 994152411136
        },
        "plugins": [
            {
                "name": "elasticsearch-cm-container-plugin",
                "version": "6.3.119",
                "elasticsearch_version": "6.3.2",
                "java_version": "1.8",
                "description": "Node-level single ChronicalMap container plugin",
                "classname": "com.outbrain.elasticsearch.plugin.OutbrainCMContainerPlugin",
                "extended_plugins": [],
                "has_native_controller": false
            },
            {
                "name": "repository-hdfs",
                "version": "6.3.2",
                "elasticsearch_version": "6.3.2",
                "java_version": "1.8",
                "description": "The HDFS repository plugin adds support for Hadoop Distributed File-System (HDFS) repositories.",
                "classname": "org.elasticsearch.repositories.hdfs.HdfsPlugin",
                "extended_plugins": [],
                "has_native_controller": false
            }
        ],
        "network_types": {
            "transport_types": {
                "security4": 3,
                "netty4": 8
            },
            "http_types": {
                "security4": 3,
                "netty4": 8
            }
        }
    }
}

That's EOL, so please upgrade ASAP :slight_smile:

As there is only one index in the cluster and all nodes hold as full copy of the data I would suspect yellow status to appear due to nodes leaving the cluster or being unresponsive, leading to shards being marked as unavailable. If the index is recreated during operation it would also start out as yellow before all replica shards are assigned but that should be brief. I would recommend looking through the logs for evidence of nodes leaving the cluster or indices being deleted and see how this aligns with the reported issues.

Yeah i know.
I am trying to get all our clusters upgraded, but sadly at the end it is not only my decision.

I will search the logs and see if this is indeed the reason.
Thank you!

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.