Unassigned shards, with status "Elasticsearch can allocate the shard" for all of them

Can you help me to explain why I have for several days 25 unassigned replica shards wich can_allocate status = yes and allocation_explanation = Elasticsearch can allocate the shard.

I supposed rebalancing job will allocate them but it is not happening .
i have 8.6 version. 38nodes and 25+ nodes have capacity to allocate this shards.
I do have any setting disabling shard allocation set.

I tried force POST _cluster/reroute

thank you

@Petr.Simik

What do you get when you run the cluster allocation explain API? That should help you explain what was going on.

GET _cluster/allocation/explain

Reference: Cluster allocation explain API | Elasticsearch Guide [8.7] | Elastic

What is the full output of the cluster stats API?

Thank you ,

clusters stats:

{
  "_nodes": {
    "total": 49,
    "successful": 49,
    "failed": 0
  },
  "cluster_name": "cem",
  "cluster_uuid": "OAIGGQ4Qzzzz_tgdFx3g",
  "timestamp": 1684469366192,
  "status": "yellow",
  "indices": {
    "count": 2921,
    "shards": {
      "total": 6718,
      "primaries": 4392,
      "replication": 0.5295992714025501,
      "index": {
        "shards": {
          "min": 1,
          "max": 15,
          "avg": 2.299897295446765
        },
        "primaries": {
          "min": 1,
          "max": 15,
          "avg": 1.5035946593632317
        },
        "replication": {
          "min": 0,
          "max": 1,
          "avg": 0.7285176309483054
        }
      }
    },
    "docs": {
      "count": 107576439901,
      "deleted": 509313563
    },
    "store": {
      "size_in_bytes": 57984226836542,
      "total_data_set_size_in_bytes": 57984226836542,
      "reserved_in_bytes": 0
    },
    "fielddata": {
      "memory_size_in_bytes": 5240080464,
      "evictions": 0
    },
    "query_cache": {
      "memory_size_in_bytes": 12558846732,
      "total_count": 6675441748,
      "hit_count": 185371466,
      "miss_count": 6490070282,
      "cache_size": 375592,
      "cache_count": 683300,
      "evictions": 307708
    },
    "completion": {
      "size_in_bytes": 0
    },
    "segments": {
      "count": 116687,
      "memory_in_bytes": 0,
      "terms_memory_in_bytes": 0,
      "stored_fields_memory_in_bytes": 0,
      "term_vectors_memory_in_bytes": 0,
      "norms_memory_in_bytes": 0,
      "points_memory_in_bytes": 0,
      "doc_values_memory_in_bytes": 0,
      "index_writer_memory_in_bytes": 753049328,
      "version_map_memory_in_bytes": 41560039,
      "fixed_bit_set_memory_in_bytes": 3937766800,
      "max_unsafe_auto_id_timestamp": 1684463620608,
      "file_sizes": {}
    },
    "mappings": {
      "total_field_count": 675461,
      "total_deduplicated_field_count": 441136,
      "total_deduplicated_mapping_size_in_bytes": 1715741,
      "field_types": [
        {
          "name": "alias",
          "count": 3461,
          "index_count": 16,
          "script_count": 0
        },
        {
          "name": "binary",
          "count": 8,
          "index_count": 8,
          "script_count": 0
        },
        {
          "name": "boolean",
          "count": 2081,
          "index_count": 413,
          "script_count": 0
        },
        {
          "name": "byte",
          "count": 12,
          "index_count": 12,
          "script_count": 0
        },
        {
          "name": "constant_keyword",
          "count": 40,
          "index_count": 14,
          "script_count": 0
        },
        {
          "name": "date",
          "count": 7825,
          "index_count": 2703,
          "script_count": 0
        },
        {
          "name": "date_nanos",
          "count": 3,
          "index_count": 3,
          "script_count": 0
        },
        {
          "name": "date_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "double",
          "count": 3326,
          "index_count": 18,
          "script_count": 0
        },
        {
          "name": "double_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "flattened",
          "count": 120,
          "index_count": 10,
          "script_count": 0
        },
        {
          "name": "float",
          "count": 3015,
          "index_count": 289,
          "script_count": 0
        },
        {
          "name": "float_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "geo_point",
          "count": 1067,
          "index_count": 770,
          "script_count": 0
        },
        {
          "name": "geo_shape",
          "count": 5,
          "index_count": 5,
          "script_count": 0
        },
        {
          "name": "half_float",
          "count": 97,
          "index_count": 21,
          "script_count": 0
        },
        {
          "name": "integer",
          "count": 6730,
          "index_count": 1142,
          "script_count": 0
        },
        {
          "name": "integer_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "ip",
          "count": 237,
          "index_count": 17,
          "script_count": 0
        },
        {
          "name": "ip_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "keyword",
          "count": 397022,
          "index_count": 2792,
          "script_count": 0
        },
        {
          "name": "long",
          "count": 69506,
          "index_count": 2070,
          "script_count": 0
        },
        {
          "name": "long_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "match_only_text",
          "count": 634,
          "index_count": 10,
          "script_count": 0
        },
        {
          "name": "nested",
          "count": 1360,
          "index_count": 224,
          "script_count": 0
        },
        {
          "name": "object",
          "count": 119663,
          "index_count": 1238,
          "script_count": 0
        },
        {
          "name": "scaled_float",
          "count": 1506,
          "index_count": 10,
          "script_count": 0
        },
        {
          "name": "shape",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "short",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "text",
          "count": 56768,
          "index_count": 1128,
          "script_count": 0
        },
        {
          "name": "unsigned_long",
          "count": 784,
          "index_count": 392,
          "script_count": 0
        },
        {
          "name": "version",
          "count": 5,
          "index_count": 5,
          "script_count": 0
        },
        {
          "name": "wildcard",
          "count": 170,
          "index_count": 10,
          "script_count": 0
        }
      ],
      "runtime_field_types": [
        {
          "name": "keyword",
          "count": 388,
          "index_count": 194,
          "scriptless_count": 388,
          "shadowed_count": 388,
          "lang": [],
          "lines_max": 0,
          "lines_total": 0,
          "chars_max": 0,
          "chars_total": 0,
          "source_max": 0,
          "source_total": 0,
          "doc_max": 0,
          "doc_total": 0
        }
      ]
    },
    "analysis": {
      "char_filter_types": [],
      "tokenizer_types": [],
      "filter_types": [],
      "analyzer_types": [],
      "built_in_char_filters": [],
      "built_in_tokenizers": [],
      "built_in_filters": [],
      "built_in_analyzers": []
    },
    "versions": [
      {
        "version": "7.4.2",
        "index_count": 26,
        "primary_shard_count": 26,
        "total_primary_bytes": 1907725401
      },
      {
        "version": "7.9.0",
        "index_count": 163,
        "primary_shard_count": 163,
        "total_primary_bytes": 75795682828
      },
      {
        "version": "7.13.3",
        "index_count": 338,
        "primary_shard_count": 338,
        "total_primary_bytes": 127436260279
      },
      {
        "version": "7.17.0",
        "index_count": 773,
        "primary_shard_count": 838,
        "total_primary_bytes": 724930396072
      },
      {
        "version": "8.6.0",
        "index_count": 1621,
        "primary_shard_count": 3027,
        "total_primary_bytes": 44084093582476
      }
    ],
    "search": {
      "total": 3431862,
      "queries": {
        "geo_bounding_box": 12,
        "regexp": 6758,
        "bool": 3365693,
        "prefix": 1622,
        "match": 75933,
        "range": 3143461,
        "nested": 19,
        "multi_match": 1927,
        "wildcard": 350,
        "match_phrase": 180909,
        "terms": 168880,
        "constant_score": 1152,
        "ids": 277,
        "match_phrase_prefix": 1033,
        "exists": 193410,
        "match_all": 2025016,
        "term": 484405,
        "simple_query_string": 19743,
        "query_string": 348890
      },
      "sections": {
        "query": 3426939,
        "terminate_after": 1553,
        "search_after": 145048,
        "highlight": 2566,
        "runtime_mappings": 14426,
        "stored_fields": 2051741,
        "script_fields": 2051852,
        "pit": 9446,
        "_source": 44234,
        "docvalue_fields": 2045088,
        "fields": 28039,
        "collapse": 15182,
        "aggs": 2750918
      }
    }
  },
  "nodes": {
    "count": {
      "total": 49,
      "coordinating_only": 4,
      "data": 0,
      "data_cold": 0,
      "data_content": 38,
      "data_frozen": 0,
      "data_hot": 38,
      "data_warm": 4,
      "index": 0,
      "ingest": 42,
      "master": 3,
      "ml": 42,
      "remote_cluster_client": 0,
      "search": 0,
      "transform": 42,
      "voting_only": 0
    },
    "versions": [
      "8.6.0"
    ],
    "os": {
      "available_processors": 368,
      "allocated_processors": 368,
      "names": [
        {
          "name": "Linux",
          "count": 49
        }
      ],
      "pretty_names": [
        {
          "pretty_name": "CentOS Linux 7 (Core)",
          "count": 48
        },
        {
          "pretty_name": "Ubuntu 20.04.5 LTS",
          "count": 1
        }
      ],
      "architectures": [
        {
          "arch": "amd64",
          "count": 49
        }
      ],
      "mem": {
        "total_in_bytes": 1619387183104,
        "adjusted_total_in_bytes": 1619387183104,
        "free_in_bytes": 136538910720,
        "used_in_bytes": 1482848272384,
        "free_percent": 8,
        "used_percent": 92
      }
    },
    "process": {
      "cpu": {
        "percent": 334
      },
      "open_file_descriptors": {
        "min": 1472,
        "max": 4228,
        "avg": 3080
      }
    },
    "jvm": {
      "max_uptime_in_millis": 1328688402,
      "versions": [
        {
          "version": "19.0.1",
          "vm_name": "OpenJDK 64-Bit Server VM",
          "vm_version": "19.0.1+10-21",
          "vm_vendor": "Oracle Corporation",
          "bundled_jdk": true,
          "using_bundled_jdk": true,
          "count": 49
        }
      ],
      "mem": {
        "heap_used_in_bytes": 313967915016,
        "heap_max_in_bytes": 721554505728
      },
      "threads": 7164
    },
    "fs": {
      "total_in_bytes": 90232745926656,
      "free_in_bytes": 31560387608576,
      "available_in_bytes": 30715321360384
    },
    "plugins": [],
    "network_types": {
      "transport_types": {
        "security4": 49
      },
      "http_types": {
        "security4": 49
      }
    },
    "discovery_types": {
      "multi-node": 49
    },
    "packaging_types": [
      {
        "flavor": "default",
        "type": "rpm",
        "count": 48
      },
      {
        "flavor": "default",
        "type": "docker",
        "count": 1
      }
    ],
    "ingest": {
      "number_of_pipelines": 63,
      "processor_stats": {
        "conditional": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "convert": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "date": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "geoip": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "grok": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "gsub": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "pipeline": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "remove": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "rename": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "script": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "set": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "set_security_user": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        }
      }
    },
    "indexing_pressure": {
      "memory": {
        "current": {
          "combined_coordinating_and_primary_in_bytes": 0,
          "coordinating_in_bytes": 0,
          "primary_in_bytes": 0,
          "replica_in_bytes": 0,
          "all_in_bytes": 0
        },
        "total": {
          "combined_coordinating_and_primary_in_bytes": 0,
          "coordinating_in_bytes": 0,
          "primary_in_bytes": 0,
          "replica_in_bytes": 0,
          "all_in_bytes": 0,
          "coordinating_rejections": 0,
          "primary_rejections": 0,
          "replica_rejections": 0
        },
        "limit_in_bytes": 0
      }
    }
  }
}

I did that on each unassigned index and for each it returns Elasticsearch can allocate the shard.

I list all shards and retrieve its detail - it returns about 15unassigned shards now

GET _cat/shards?v=true&h=index,shard,prirep,state,node,unassigned.reason,unassigned.details&s=state

index                                                         shard prirep state      node         unassigned.reason 
net_syslog_sig_raw-000018                                     0     r      UNASSIGNED              INDEX_CREATED     
cnfm_regional_test_snmptrapd_raw-000063                       0     r      UNASSIGNED              INDEX_CREATED     
cnfm_central_test_main-000023                                 0     r      UNASSIGNED              INDEX_CREATED    
...

thank I go (by script one by one and check its reason"

GET _cluster/allocation/explain
{
  "index": "cnfm_central_test_main-000023",
  "shard": 0,
  "primary": false
}

this returns long list of nodes where can allocate is yes

{
  "index": "cnfm_central_test_main-000023",
  "shard": 0,
  "primary": false,
  "current_state": "unassigned",
  "unassigned_info": {
    "reason": "INDEX_CREATED",
    "at": "2023-05-18T19:53:40.168Z",
    "last_allocation_status": "no_attempt"
  },
  "can_allocate": "yes",
  "allocate_explanation": "Elasticsearch can allocate the shard.",
  "target_node": {
    "id": "5q4-xxx",
    "name": "tela12node",
    "transport_address": "10.10.10.22:9300",
    "attributes": {
      "xpack.installed": "true",
      "ml.allocated_processors_double": "8.0",
      "ml.max_jvm_size": "16106127360",
      "ml.allocated_processors": "8",
      "ml.machine_memory": "33566629888"
    }
  },
  "node_allocation_decisions": [
    {
      "node_id": "5q4-xxx",
      "node_name": "tela12node",
      "transport_address": "10.10.10.22:9300",
      "node_attributes": {
        "xpack.installed": "true",
        "ml.allocated_processors_double": "8.0",
        "ml.max_jvm_size": "16106127360",
        "ml.allocated_processors": "8",
        "ml.machine_memory": "33566629888"
      },
      "node_decision": "yes",
      "weight_ranking": 5
    },
....

Does this reproduce in 8.7.1? There have been several changes in the allocation area since 8.6.

2 Likes

Thank you, I will upgrade the cluster and will see.
Do you think it might have common root cause with issue I have reported here yesterday:

it seem the cluster have problem with rebalancing process

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.