Why am I getting warning about "Number of open shards exceeds cluster soft limit"?

I have a single-node ES 6.8 cluster with ~5500 shards. I have set the cluster.max_shards_per_node to '10000' in /etc/elasticsearch/elasticsearch.yml as per this article:

But even after restarting ES, I get the "Number of open shards exceeds cluster soft limit" warning in Kibana's 7.0 Upgrade Assistant... Why would I still be getting this warning?

That looks like the correct setting to me. Are you sure it hasn't also been dynamically set to something else, overriding the value you have in Elasticsearch.yml? Can you check the value using

GET _nodes/_all

What is the output from the _cluster/stats?pretty&human API?

{
  "_nodes" : {
    "total" : 1,
    "successful" : 1,
    "failed" : 0
  },
  "cluster_name" : "research-elk",
  "nodes" : {
    "BqGXFOe4SqmBjPlu4w2nSg" : {
      "name" : "logstash01",
      "transport_address" : "[redacted]:9300",
      "host" : "[redacted]",
      "ip" : "[redacted]",
      "version" : "6.8.23",
      "build_flavor" : "default",
      "build_type" : "deb",
      "build_hash" : "4f67856",
      "total_indexing_buffer" : 1706649190,
      "roles" : [
        "master",
        "data",
        "ingest"
      ],
      "attributes" : {
        "ml.machine_memory" : "33721262080",
        "xpack.installed" : "true",
        "ml.max_open_jobs" : "20",
        "ml.enabled" : "true"
      },
      "settings" : {
        "pidfile" : "/var/run/elasticsearch/elasticsearch.pid",
        "cluster" : {
          "name" : "research-elk",
          "max_shards_per_node" : "20000"
        },
        "node" : {
          "attr" : {
            "xpack" : {
              "installed" : "true"
            },
            "ml" : {
              "machine_memory" : "33721262080",
              "max_open_jobs" : "20",
              "enabled" : "true"
            }
          },
          "name" : "logstash01"
        },
        "path" : {
          "data" : [
            "/data/elasticsearch"
          ],
          "logs" : "/var/log/elasticsearch",
          "home" : "/usr/share/elasticsearch",
          "repo" : [
            "/data/es_backup_68"
          ]
        },
--{snipped}--
{
  "_nodes" : {
    "total" : 1,
    "successful" : 1,
    "failed" : 0
  },
  "cluster_name" : "research-elk",
  "cluster_uuid" : "_BLINyOtT6eVYuVmUBVM0Q",
  "timestamp" : 1643235282129,
  "status" : "yellow",
  "indices" : {
    "count" : 1081,
    "shards" : {
      "total" : 5349,
      "primaries" : 5349,
      "replication" : 0.0,
      "index" : {
        "shards" : {
          "min" : 1,
          "max" : 5,
          "avg" : 4.948196114708603
        },
        "primaries" : {
          "min" : 1,
          "max" : 5,
          "avg" : 4.948196114708603
        },
        "replication" : {
          "min" : 0.0,
          "max" : 0.0,
          "avg" : 0.0
        }
      }
    },
    "docs" : {
      "count" : 2236394220,
      "deleted" : 396212
    },
    "store" : {
      "size" : "939.3gb",
      "size_in_bytes" : 1008625224263
    },
    "fielddata" : {
      "memory_size" : "74.7kb",
      "memory_size_in_bytes" : 76568,
      "evictions" : 0
    },
    "query_cache" : {
      "memory_size" : "9.8mb",
      "memory_size_in_bytes" : 10305840,
      "total_count" : 403944,
      "hit_count" : 92086,
      "miss_count" : 311858,
      "cache_size" : 314,
      "cache_count" : 363,
      "evictions" : 49
    },
    "completion" : {
      "size" : "0b",
      "size_in_bytes" : 0
    },
    "segments" : {
      "count" : 62039,
      "memory" : "2.9gb",
      "memory_in_bytes" : 3171170775,
      "terms_memory" : "2.6gb",
      "terms_memory_in_bytes" : 2847963628,
      "stored_fields_memory" : "196.3mb",
      "stored_fields_memory_in_bytes" : 205863416,
      "term_vectors_memory" : "0b",
      "term_vectors_memory_in_bytes" : 0,
      "norms_memory" : "1.2kb",
      "norms_memory_in_bytes" : 1280,
      "points_memory" : "32.3mb",
      "points_memory_in_bytes" : 33880735,
      "doc_values_memory" : "79.5mb",
      "doc_values_memory_in_bytes" : 83461716,
      "index_writer_memory" : "4.9mb",
      "index_writer_memory_in_bytes" : 5211760,
      "version_map_memory" : "0b",
      "version_map_memory_in_bytes" : 0,
      "fixed_bit_set" : "7.8mb",
      "fixed_bit_set_memory_in_bytes" : 8219272,
      "max_unsafe_auto_id_timestamp" : 1643223611362,
      "file_sizes" : { }
    }
  },
  "nodes" : {
    "count" : {
      "total" : 1,
      "data" : 1,
      "coordinating_only" : 0,
      "master" : 1,
      "ingest" : 1
    },
    "versions" : [
      "6.8.23"
    ],
    "os" : {
      "available_processors" : 16,
      "allocated_processors" : 16,
      "names" : [
        {
          "name" : "Linux",
          "count" : 1
        }
      ],
      "pretty_names" : [
        {
          "pretty_name" : "Ubuntu 16.04.7 LTS",
          "count" : 1
        }
      ],
      "mem" : {
        "total" : "31.4gb",
        "total_in_bytes" : 33721262080,
        "free" : "2.9gb",
        "free_in_bytes" : 3118370816,
        "used" : "28.5gb",
        "used_in_bytes" : 30602891264,
        "free_percent" : 9,
        "used_percent" : 91
      }
    },
    "process" : {
      "cpu" : {
        "percent" : 3
      },
      "open_file_descriptors" : {
        "min" : 39101,
        "max" : 39101,
        "avg" : 39101
      }
    },
    "jvm" : {
      "max_uptime" : "1.8d",
      "max_uptime_in_millis" : 162122644,
      "versions" : [
        {
          "version" : "1.8.0_292",
          "vm_name" : "OpenJDK 64-Bit Server VM",
          "vm_version" : "25.292-b10",
          "vm_vendor" : "Private Build",
          "count" : 1
        }
      ],
      "mem" : {
        "heap_used" : "10gb",
        "heap_used_in_bytes" : 10833665528,
        "heap_max" : "15.8gb",
        "heap_max_in_bytes" : 17066491904
      },
      "threads" : 283
    },
    "fs" : {
      "total" : "7tb",
      "total_in_bytes" : 7714700525568,
      "free" : "5.1tb",
      "free_in_bytes" : 5694657527808,
      "available" : "5.1tb",
      "available_in_bytes" : 5694657527808
    },
    "plugins" : [ ],
    "network_types" : {
      "transport_types" : {
        "security4" : 1
      },
      "http_types" : {
        "security4" : 1
      }
    }
  }
}

I might be reading the code wrong (because this is fairly old code at this point), but I think it's actually not reading that setting from Elasticsearch.yml. It's only reading it from cluster settings that have been set through the API. Since it's not finding it there, it's using the default. Try setting it through the API like:

PUT /_cluster/settings
{
    "transient" : {
        "cluster.max_shards_per_node" : 10000
    }
}

Have done so; output:

{
  "acknowledged" : true,
  "persistent" : { },
  "transient" : {
    "cluster" : {
      "max_shards_per_node" : "10000"
    }
  }
}

That did it; it has now in the "Cluster" tab:

All clear!

You have no cluster issues.

Check the Overview tab for next steps.

Thanks!

You have a single node with nearly 5400 shards, that's way too many and you need to work to reduce it.

Also please upgrade, 8.0 is not far off and you're running 6.X.

I hear you; I need to get what I have in order, and then I can work on data lifecycle matters, etc.

Also, I do want to upgrade to 7.x, but I have some index issues to resolve as well before I can do the upgrade... Like this one...

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.