Elasticsearch complaining about Shard limit

I have had this problem every other month. I just go in and close the oldest 2 months of indexes. That has been solving the problem up until this week. Even though the Elasticsearch stats pages tells me that I only have 691 shards, var log messages is still getting complains about the shard limit has been reached.

Here are the Stats.

{"_shards":{"total":691,"successful":375,"failed":0},"_all":{"primaries":{"docs":{"count":407595633,"deleted":4809986},"store":{"size_in_bytes":347694910745},"indexing":{"index_total":22743762,"index_time_in_millis":6942641,"index_current":0,"index_failed":2,"delete_total":0,"delete_time_in_millis":0,"delete_current":0,"noop_update_total":0,"is_throttled":false,"throttle_time_in_millis":0},"get":{"total":8711,"time_in_millis":3039,"exists_total":8706,"exists_time_in_millis":3038,"missing_total":5,"missing_time_in_millis":1,"current":0},"search" 

Here is the error message:

 Jan  5 16:04:08 dmlog-01 auditbeat[24352]: 2021-01-05T16:04:08.901-0800#011INFO#011[monitoring]#011log/log.go:145#011Non-zero metrics in the last 30s#011{"monitoring": {"metrics": {"auditd":{"received_msgs":4},"beat":{"cpu":{"system":{"ticks":1668120,"time":{"ms":674}},"total":{"ticks":15543610,"time":{"ms":5598},"value":15543610},"user":{"ticks":13875490,"time":{"ms":4924}}},"handles":{"limit":{"hard":4096,"soft":1024},"open":211},"info":{"ephemeral_id":"a954e875-9df0-4ec0-a024-96f5cdc86e74","uptime":{"ms":83374980}},"memstats":{"gc_next":11632848,"memory_alloc":10651656,"memory_total":81742771136},"runtime":{"goroutines":53}},"libbeat":{"config":{"module":{"running":0}},"output":{"events":{"acked":13,"batches":5,"total":13},"read":{"bytes":30},"write":{"bytes":6377}},"pipeline":{"clients":3,"events":{"active":0,"published":13,"total":13},"queue":{"acked":13}}},"metricbeat":{"auditd":{"auditd":{"events":4,"success":4}},"system":{"login":{"events":1,"success":1},"process":{"events":2,"success":2},"socket":{"events":6,"success":6}}},"system":{"load":{"1":0.77,"15":1,"5":1,"norm":{"1":0.0963,"15":0.125,"5":0.125}}}}}}

Best Regards,
Steve

I don't think that is the right error entry you are talking about.

What is the output from the _cluster/stats?pretty&human API?

Thanks for the quick response.

{
  "_nodes" : {
    "total" : 1,
    "successful" : 1,
    "failed" : 0
  },
  "cluster_name" : "DataMigrationCluster",
  "cluster_uuid" : "I0YCE7KzQ6CUrRXeQeu50A",
  "timestamp" : 1609899693886,
  "status" : "yellow",
  "indices" : {
    "count" : 973,
    "shards" : {
      "total" : 973,
      "primaries" : 973,
      "replication" : 0.0,
      "index" : {
        "shards" : {
          "min" : 1,
          "max" : 1,
          "avg" : 1.0
        },
        "primaries" : {
          "min" : 1,
          "max" : 1,
          "avg" : 1.0
        },
        "replication" : {
          "min" : 0.0,
          "max" : 0.0,
          "avg" : 0.0
        }
      }
    },
    "docs" : {
      "count" : 1126819718,
      "deleted" : 4757503
    },
    "store" : {
      "size" : "948.6gb",
      "size_in_bytes" : 1018563670795
    },
    "fielddata" : {
      "memory_size" : "57.3kb",
      "memory_size_in_bytes" : 58760,
      "evictions" : 0
    },
    "query_cache" : {
      "memory_size" : "268kb",
      "memory_size_in_bytes" : 274448,
      "total_count" : 1861,
      "hit_count" : 245,
      "miss_count" : 1616,
      "cache_size" : 36,
      "cache_count" : 72,
      "evictions" : 36
    },
    "completion" : {
      "size" : "0b",
      "size_in_bytes" : 0
    },
    "segments" : {
      "count" : 4578,
      "memory" : "498.7mb",
      "memory_in_bytes" : 523012094,
      "terms_memory" : "310.3mb",
      "terms_memory_in_bytes" : 325468286,
      "stored_fields_memory" : "141.6mb",
      "stored_fields_memory_in_bytes" : 148488336,
      "term_vectors_memory" : "0b",
      "term_vectors_memory_in_bytes" : 0,
      "norms_memory" : "28.9mb",
      "norms_memory_in_bytes" : 30399808,
      "points_memory" : "0b",
      "points_memory_in_bytes" : 0,
      "doc_values_memory" : "17.7mb",
      "doc_values_memory_in_bytes" : 18655664,
      "index_writer_memory" : "583.2mb",
      "index_writer_memory_in_bytes" : 611572936,
      "version_map_memory" : "4.9mb",
      "version_map_memory_in_bytes" : 5150947,
      "fixed_bit_set" : "2.1mb",
      "fixed_bit_set_memory_in_bytes" : 2223936,
      "max_unsafe_auto_id_timestamp" : 1609890624622,
      "file_sizes" : { }
    }
  },
  "nodes" : {
    "count" : {
      "total" : 1,
      "coordinating_only" : 0,
      "data" : 1,
      "ingest" : 1,
      "master" : 1,
      "ml" : 1,
      "voting_only" : 0
    },
    "versions" : [
      "7.6.0"
    ],
    "os" : {
      "available_processors" : 8,
      "allocated_processors" : 8,
      "names" : [
        {
          "name" : "Linux",
          "count" : 1
        }
      ],
      "pretty_names" : [
        {
          "pretty_name" : "CentOS Linux 8 (Core)",
          "count" : 1
        }
      ],
      "mem" : {
        "total" : "70.4gb",
        "total_in_bytes" : 75689345024,
        "free" : "2.8gb",
        "free_in_bytes" : 3072073728,
        "used" : "67.6gb",
        "used_in_bytes" : 72617271296,
        "free_percent" : 4,
        "used_percent" : 96
      }
    },
    "process" : {
      "cpu" : {
        "percent" : 4
      },
      "open_file_descriptors" : {
        "min" : 8426,
        "max" : 8426,
        "avg" : 8426
      }
    },
    "jvm" : {
      "max_uptime" : "1d",
      "max_uptime_in_millis" : 92521134,
      "versions" : [
        {
          "version" : "13.0.2",
          "vm_name" : "OpenJDK 64-Bit Server VM",
          "vm_version" : "13.0.2+8",
          "vm_vendor" : "AdoptOpenJDK",
          "bundled_jdk" : true,
          "using_bundled_jdk" : true,
          "count" : 1
        }
      ],
      "mem" : {
        "heap_used" : "15.6gb",
        "heap_used_in_bytes" : 16845028760,
        "heap_max" : "29.9gb",
        "heap_max_in_bytes" : 32142524416
      },
      "threads" : 123
    },
    "fs" : {
      "total" : "1.6tb",
      "total_in_bytes" : 1799384989696,
      "free" : "699gb",
      "free_in_bytes" : 750594617344,
      "available" : "699gb",
      "available_in_bytes" : 750594617344
    },
    "plugins" : [ ],
    "network_types" : {
      "transport_types" : {
        "security4" : 1
      },
      "http_types" : {
        "security4" : 1
      }
    },
    "discovery_types" : {
      "zen" : 1
    },
    "packaging_types" : [
      {
        "flavor" : "default",
        "type" : "rpm",
        "count" : 1
      }
    ],
    "ingest" : {
      "number_of_pipelines" : 1,
      "processor_stats" : {
        "gsub" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "script" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        }
      }
    }
  }
}

You can/should probably reduce your shard count a fair bit, you're looking at ~1 gig a shard, which is very low.

If you are using Beats then use ILM as well. Otherwise look at using _shrink to reduce any indices that have more than one primary. Or consider reindexing daily indices into monthly/weekly.

Thank you for the suggestions. I will do some research and try one of those.

1 Like