System load too high

I'm working on a VM with a single-node instance of Elasticsearch and I registered a lot of alerts about system load for many days. Indeed the overall system load is always higher then 1.
Below a Grafana chart that shows that.

The VM is a Linux Centos 7 with 11TB HardDisk and 16GB Ram.
I've set up the jvm heap size as below:

-Xms8g
-Xmx8g

Elasticsearch has about 350 indices, for a total of 8.6 GB of occupied space.
Below two screens: an example of JVM distribution and Elastic system load.

JVM_heap
system_load

I suspect that could be a case of oversharding, but I had to model the indices in this way to give certain read permissions to different roles to different indices. This is therefore an external requirement that I cannot modify.

What can I do to decrease the system load? What configurations can I set?

Thanks a lot

How many shards do you have in total? Is the disk of your node backed by SSD or a Spinning disk?

One of the recommendations of Elastic is to have a maximum of 20 shards per GB of Heap, since you have 8 GB of Heap it would be 160 shards, considering that each one of your indices have only one primary shard, you are already above that with 350.

Also, with 350 indices and just 8.6 GB of space used, you have too many small shards.

It could really be an oversharding problem, but the way to solve an oversharding problem is to reduce the number of shards and indices.

What is the output from the _cluster/stats?pretty&human API?

Hi,
I have 350 shards in total, since I have a single-node instance I have only primary shards. The disk is a RAID5 composed by 4HDD x 4TB (total formatted 11TB).

I have certainly too many small indices, but due to the segregation of the data, I cannot change the structure of the indexes in a short time

That does not sound like a recommended disk setup and could be very slow. Are you monitoring disk I/O and iowait? If so, what does this look like?

It would also help if you described the workload. Is it indexing or query heavy? If indexing heavy, how many of the indices are you actively indexing into?

Hi,
below the results of _cluster/stats?pretty&human API

{
  "_nodes" : {
    "total" : 1,
    "successful" : 1,
    "failed" : 0
  },
  "cluster_name" :xxxx,
  "cluster_uuid" :xxxx,
  "timestamp" : 1641371637499,
  "status" : "green",
  "indices" : {
    "count" : 324,
    "shards" : {
      "total" : 324,
      "primaries" : 324,
      "replication" : 0.0,
      "index" : {
        "shards" : {
          "min" : 1,
          "max" : 1,
          "avg" : 1.0
        },
        "primaries" : {
          "min" : 1,
          "max" : 1,
          "avg" : 1.0
        },
        "replication" : {
          "min" : 0.0,
          "max" : 0.0,
          "avg" : 0.0
        }
      }
    },
    "docs" : {
      "count" : 23184865,
      "deleted" : 2605143
    },
    "store" : {
      "size" : "8.3gb",
      "size_in_bytes" : 8982096554,
      "total_data_set_size" : "8.3gb",
      "total_data_set_size_in_bytes" : 8982096554,
      "reserved" : "0b",
      "reserved_in_bytes" : 0
    },
    "fielddata" : {
      "memory_size" : "105.2kb",
      "memory_size_in_bytes" : 107736,
      "evictions" : 0
    },
    "query_cache" : {
      "memory_size" : "803kb",
      "memory_size_in_bytes" : 822296,
      "total_count" : 4391601,
      "hit_count" : 375796,
      "miss_count" : 4015805,
      "cache_size" : 541,
      "cache_count" : 5087,
      "evictions" : 4546
    },
    "completion" : {
      "size" : "0b",
      "size_in_bytes" : 0
    },
    "segments" : {
      "count" : 1513,
      "memory" : "13.5mb",
      "memory_in_bytes" : 14169036,
      "terms_memory" : "8.3mb",
      "terms_memory_in_bytes" : 8753216,
      "stored_fields_memory" : "739.6kb",
      "stored_fields_memory_in_bytes" : 757416,
      "term_vectors_memory" : "0b",
      "term_vectors_memory_in_bytes" : 0,
      "norms_memory" : "12.9kb",
      "norms_memory_in_bytes" : 13248,
      "points_memory" : "0b",
      "points_memory_in_bytes" : 0,
      "doc_values_memory" : "4.4mb",
      "doc_values_memory_in_bytes" : 4645156,
      "index_writer_memory" : "1.9mb",
      "index_writer_memory_in_bytes" : 2051056,
      "version_map_memory" : "168b",
      "version_map_memory_in_bytes" : 168,
      "fixed_bit_set" : "3.1mb",
      "fixed_bit_set_memory_in_bytes" : 3256424,
      "max_unsafe_auto_id_timestamp" : 1641286516089,
      "file_sizes" : { }
    },
    "mappings" : {
      "field_types" : [
        {
          "name" : "alias",
          "count" : 3614,
          "index_count" : 278,
          "script_count" : 0
        },
        {
          "name" : "boolean",
          "count" : 40083,
          "index_count" : 297,
          "script_count" : 0
        },
        {
          "name" : "byte",
          "count" : 278,
          "index_count" : 278,
          "script_count" : 0
        },
        {
          "name" : "constant_keyword",
          "count" : 837,
          "index_count" : 279,
          "script_count" : 0
        },
        {
          "name" : "date",
          "count" : 55163,
          "index_count" : 309,
          "script_count" : 0
        },
        {
          "name" : "double",
          "count" : 10328,
          "index_count" : 285,
          "script_count" : 0
        },
        {
          "name" : "flattened",
          "count" : 11676,
          "index_count" : 278,
          "script_count" : 0
        },
        {
          "name" : "float",
          "count" : 9808,
          "index_count" : 289,
          "script_count" : 0
        },
        {
          "name" : "geo_point",
          "count" : 3058,
          "index_count" : 278,
          "script_count" : 0
        },
        {
          "name" : "half_float",
          "count" : 56,
          "index_count" : 14,
          "script_count" : 0
        },
        {
          "name" : "integer",
          "count" : 154,
          "index_count" : 7,
          "script_count" : 0
        },
        {
          "name" : "ip",
          "count" : 37531,
          "index_count" : 279,
          "script_count" : 0
        },
        {
          "name" : "ip_range",
          "count" : 278,
          "index_count" : 278,
          "script_count" : 0
        },
        {
          "name" : "keyword",
          "count" : 1260997,
          "index_count" : 309,
          "script_count" : 0
        },
        {
          "name" : "long",
          "count" : 303510,
          "index_count" : 306,
          "script_count" : 0
        },
        {
          "name" : "nested",
          "count" : 4473,
          "index_count" : 289,
          "script_count" : 0
        },
        {
          "name" : "object",
          "count" : 251935,
          "index_count" : 309,
          "script_count" : 0
        },
        {
          "name" : "scaled_float",
          "count" : 278,
          "index_count" : 278,
          "script_count" : 0
        },
        {
          "name" : "short",
          "count" : 28634,
          "index_count" : 278,
          "script_count" : 0
        },
        {
          "name" : "text",
          "count" : 32577,
          "index_count" : 295,
          "script_count" : 0
        },
        {
          "name" : "version",
          "count" : 4,
          "index_count" : 4,
          "script_count" : 0
        },
        {
          "name" : "wildcard",
          "count" : 278,
          "index_count" : 278,
          "script_count" : 0
        }
      ],
      "runtime_field_types" : [ ]
    },
    "analysis" : {
      "char_filter_types" : [ ],
      "tokenizer_types" : [ ],
      "filter_types" : [ ],
      "analyzer_types" : [ ],
      "built_in_char_filters" : [ ],
      "built_in_tokenizers" : [ ],
      "built_in_filters" : [ ],
      "built_in_analyzers" : [ ]
    },
    "versions" : [
      {
        "version" : "7.15.1",
        "index_count" : 231,
        "primary_shard_count" : 231,
        "total_primary_size" : "7.6gb",
        "total_primary_bytes" : 8266937765
      },
      {
        "version" : "7.16.2",
        "index_count" : 93,
        "primary_shard_count" : 93,
        "total_primary_size" : "682mb",
        "total_primary_bytes" : 715158789
      }
    ]
  },
  "nodes" : {
    "count" : {
      "total" : 1,
      "coordinating_only" : 0,
      "data" : 1,
      "data_cold" : 1,
      "data_content" : 1,
      "data_frozen" : 1,
      "data_hot" : 1,
      "data_warm" : 1,
      "ingest" : 1,
      "master" : 1,
      "ml" : 1,
      "remote_cluster_client" : 1,
      "transform" : 1,
      "voting_only" : 0
    },
    "versions" : [
      "7.16.2"
    ],
    "os" : {
      "available_processors" : 8,
      "allocated_processors" : 8,
      "names" : [
        {
          "name" : "Linux",
          "count" : 1
        }
      ],
      "pretty_names" : [
        {
          "pretty_name" : "CentOS Linux 7 (Core)",
          "count" : 1
        }
      ],
      "architectures" : [
        {
          "arch" : "amd64",
          "count" : 1
        }
      ],
      "mem" : {
        "total" : "15.4gb",
        "total_in_bytes" : 16547852288,
        "free" : "214.2mb",
        "free_in_bytes" : 224673792,
        "used" : "15.2gb",
        "used_in_bytes" : 16323178496,
        "free_percent" : 1,
        "used_percent" : 99
      }
    },
    "process" : {
      "cpu" : {
        "percent" : 2
      },
      "open_file_descriptors" : {
        "min" : 2143,
        "max" : 2143,
        "avg" : 2143
      }
    },
    "jvm" : {
      "max_uptime" : "23.6h",
      "max_uptime_in_millis" : 85315845,
      "versions" : [
        {
          "version" : "17.0.1",
          "vm_name" : "OpenJDK 64-Bit Server VM",
          "vm_version" : "17.0.1+12",
          "vm_vendor" : "Eclipse Adoptium",
          "bundled_jdk" : true,
          "using_bundled_jdk" : true,
          "count" : 1
        }
      ],
      "mem" : {
        "heap_used" : "4.3gb",
        "heap_used_in_bytes" : 4664314360,
        "heap_max" : "8gb",
        "heap_max_in_bytes" : 8589934592
      },
      "threads" : 115
    },
    "fs" : {
      "total" : "10.8tb",
      "total_in_bytes" : 11907179462656,
      "free" : "10.8tb",
      "free_in_bytes" : 11885940756480,
      "available" : "10.2tb",
      "available_in_bytes" : 11285826506752
    },
    "plugins" : [ ],
    "network_types" : {
      "transport_types" : {
        "security4" : 1
      },
      "http_types" : {
        "security4" : 1
      }
    },
    "discovery_types" : {
      "single-node" : 1
    },
    "packaging_types" : [
      {
        "flavor" : "default",
        "type" : "rpm",
        "count" : 1
      }
    ],
    "ingest" : {
      "number_of_pipelines" : 7,
      "processor_stats" : {
        "conditional" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "convert" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "geoip" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "grok" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "gsub" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "remove" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "rename" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "script" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "set" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "set_security_user" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        }
      }
    }
  }
}

[details="Summary"]
This text will be hidden
[/details]

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.