Elasticsearch 8.7.0 High Heap Usage

Hello, we have a 3 node cluster one loadbalancer node, one slave node and one master node running with latest Ubuntu 22.04.2 and Elasticsearch with Kibana on version 8.7.0. The master and slave system has 4 CPUs and 64 GB of RAM. The loadbalancer has only 2 CPU-Cores and 8 GB of RAM.

We have only 7 indices.
5 indicies with one primary on master node and in slave as replica and the other two main indicies with 15 shards on the first index (mapping is about 5700 lines) and on the second main index only 10 shards (mapping is about 2700 lines). Is it possible that the mapping fields are too high valued? We are filling the elasticsearch cluster with data by initial mini-bulks and the heap usage is significantly arising. The elasticsearch 8 cluster has currently 197 GB total on data filled.

The problem is, the heap usage is always on both nodes (slave and master) about 90%. We increased in the jvm.options the Xms & Xmx params to 32 GB but the heap is not reducing by itself. Also after a restart it reaches to 90% heap usage without any filling.

Following things we tried:

  • Reducing the shards by shrinking the main indices

  • Rebooting the server

  • Verified the mapping fields

  • Adjusted the GC params

Also to mention we closed and deleted both main indicies and the heap usage was reduced to 1% but we need this type of data because we are distributing it to our customers.

Is there any kind of solutions we can try to fix this issue?

Welcome to our community! :smiley:

There are no such things as load balancer or slave nodes in Elasticsearch, are you able to clarify what you mean by those pelase?

What is the output from the _cluster/stats?pretty&human API?

Hello, by Slave and Loadbalancer nodes I mean one data node and the other is a coordinating node.

Here is the output:

{
  "_nodes": {
    "total": 3,
    "successful": 3,
    "failed": 0
  },
  "cluster_name": "es_lab_report",
  "cluster_uuid": "dtFb7uq1TfSgmRMY17505g",
  "timestamp": 1682489279230,
  "status": "green",
  "indices": {
    "count": 30,
    "shards": {
      "total": 106,
      "primaries": 53,
      "replication": 1,
      "index": {
        "shards": {
          "min": 2,
          "max": 30,
          "avg": 3.533333333333333
        },
        "primaries": {
          "min": 1,
          "max": 15,
          "avg": 1.7666666666666666
        },
        "replication": {
          "min": 1,
          "max": 1,
          "avg": 1
        }
      }
    },
    "docs": {
      "count": 6818607,
      "deleted": 1197316
    },
    "store": {
      "size": "193.2gb",
      "size_in_bytes": 207528606307,
      "total_data_set_size": "193.2gb",
      "total_data_set_size_in_bytes": 207528606307,
      "reserved": "0b",
      "reserved_in_bytes": 0
    },
    "fielddata": {
      "memory_size": "0b",
      "memory_size_in_bytes": 0,
      "evictions": 0
    },
    "query_cache": {
      "memory_size": "2.4mb",
      "memory_size_in_bytes": 2607165,
      "total_count": 2327370,
      "hit_count": 102133,
      "miss_count": 2225237,
      "cache_size": 655,
      "cache_count": 1729,
      "evictions": 1074
    },
    "completion": {
      "size": "41.4gb",
      "size_in_bytes": 44502476161
    },
    "segments": {
      "count": 1679,
      "memory": "0b",
      "memory_in_bytes": 0,
      "terms_memory": "0b",
      "terms_memory_in_bytes": 0,
      "stored_fields_memory": "0b",
      "stored_fields_memory_in_bytes": 0,
      "term_vectors_memory": "0b",
      "term_vectors_memory_in_bytes": 0,
      "norms_memory": "0b",
      "norms_memory_in_bytes": 0,
      "points_memory": "0b",
      "points_memory_in_bytes": 0,
      "doc_values_memory": "0b",
      "doc_values_memory_in_bytes": 0,
      "index_writer_memory": "6.8mb",
      "index_writer_memory_in_bytes": 7162736,
      "version_map_memory": "73.5kb",
      "version_map_memory_in_bytes": 75354,
      "fixed_bit_set": "3.3kb",
      "fixed_bit_set_memory_in_bytes": 3480,
      "max_unsafe_auto_id_timestamp": 1682467204746,
      "file_sizes": {}
    },
    "mappings": {
      "total_field_count": 12673,
      "total_deduplicated_field_count": 11909,
      "total_deduplicated_mapping_size": "75.4kb",
      "total_deduplicated_mapping_size_in_bytes": 77251,
      "field_types": [
        {
          "name": "alias",
          "count": 13,
          "index_count": 1,
          "script_count": 0
        },
        {
          "name": "binary",
          "count": 1,
          "index_count": 1,
          "script_count": 0
        },
        {
          "name": "boolean",
          "count": 333,
          "index_count": 18,
          "script_count": 0
        },
        {
          "name": "completion",
          "count": 98,
          "index_count": 5,
          "script_count": 0
        },
        {
          "name": "constant_keyword",
          "count": 9,
          "index_count": 3,
          "script_count": 0
        },
        {
          "name": "date",
          "count": 275,
          "index_count": 21,
          "script_count": 0
        },
        {
          "name": "double",
          "count": 130,
          "index_count": 3,
          "script_count": 0
        },
        {
          "name": "flattened",
          "count": 48,
          "index_count": 1,
          "script_count": 0
        },
        {
          "name": "float",
          "count": 68,
          "index_count": 6,
          "script_count": 0
        },
        {
          "name": "geo_point",
          "count": 9,
          "index_count": 1,
          "script_count": 0
        },
        {
          "name": "half_float",
          "count": 16,
          "index_count": 4,
          "script_count": 0
        },
        {
          "name": "integer",
          "count": 44,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "ip",
          "count": 171,
          "index_count": 3,
          "script_count": 0
        },
        {
          "name": "keyword",
          "count": 6495,
          "index_count": 22,
          "script_count": 0
        },
        {
          "name": "long",
          "count": 2278,
          "index_count": 19,
          "script_count": 0
        },
        {
          "name": "match_only_text",
          "count": 63,
          "index_count": 1,
          "script_count": 0
        },
        {
          "name": "nested",
          "count": 26,
          "index_count": 6,
          "script_count": 0
        },
        {
          "name": "object",
          "count": 1262,
          "index_count": 21,
          "script_count": 0
        },
        {
          "name": "scaled_float",
          "count": 3,
          "index_count": 1,
          "script_count": 0
        },
        {
          "name": "short",
          "count": 203,
          "index_count": 1,
          "script_count": 0
        },
        {
          "name": "text",
          "count": 1108,
          "index_count": 17,
          "script_count": 0
        },
        {
          "name": "version",
          "count": 3,
          "index_count": 3,
          "script_count": 0
        },
        {
          "name": "wildcard",
          "count": 17,
          "index_count": 1,
          "script_count": 0
        }
      ],
      "runtime_field_types": []
    },
    "analysis": {
      "char_filter_types": [],
      "tokenizer_types": [],
      "filter_types": [],
      "analyzer_types": [
        {
          "name": "custom",
          "count": 7,
          "index_count": 7
        }
      ],
      "built_in_char_filters": [],
      "built_in_tokenizers": [
        {
          "name": "keyword",
          "count": 7,
          "index_count": 7
        }
      ],
      "built_in_filters": [
        {
          "name": "lowercase",
          "count": 7,
          "index_count": 7
        }
      ],
      "built_in_analyzers": [
        {
          "name": "keyword_lowercase",
          "count": 1123,
          "index_count": 5
        }
      ]
    },
    "versions": [
      {
        "version": "8.6.0",
        "index_count": 10,
        "primary_shard_count": 10,
        "total_primary_size": "465.8mb",
        "total_primary_bytes": 488528205
      },
      {
        "version": "8.6.2",
        "index_count": 8,
        "primary_shard_count": 31,
        "total_primary_size": "96gb",
        "total_primary_bytes": 103125182478
      },
      {
        "version": "8.7.0",
        "index_count": 12,
        "primary_shard_count": 12,
        "total_primary_size": "186.9mb",
        "total_primary_bytes": 196020778
      }
    ],
    "search": {
      "total": 94514,
      "queries": {
        "bool": 94224,
        "terms": 59657,
        "prefix": 127,
        "match": 21016,
        "match_phrase_prefix": 9,
        "match_all": 1,
        "exists": 36563,
        "range": 46189,
        "term": 81788,
        "nested": 1,
        "simple_query_string": 2449,
        "wildcard": 1
      },
      "sections": {
        "runtime_mappings": 102,
        "query": 94226,
        "terminate_after": 50,
        "_source": 299,
        "pit": 93,
        "fields": 101,
        "collapse": 10460,
        "aggs": 36699
      }
    }
  },
  "nodes": {
    "count": {
      "total": 3,
      "coordinating_only": 0,
      "data": 2,
      "data_cold": 0,
      "data_content": 0,
      "data_frozen": 0,
      "data_hot": 0,
      "data_warm": 0,
      "index": 0,
      "ingest": 0,
      "master": 1,
      "ml": 0,
      "remote_cluster_client": 3,
      "search": 0,
      "transform": 0,
      "voting_only": 0
    },
    "versions": [
      "8.7.0"
    ],
    "os": {
      "available_processors": 12,
      "allocated_processors": 12,
      "names": [
        {
          "name": "Linux",
          "count": 3
        }
      ],
      "pretty_names": [
        {
          "pretty_name": "Ubuntu 22.04.2 LTS",
          "count": 3
        }
      ],
      "architectures": [
        {
          "arch": "amd64",
          "count": 3
        }
      ],
      "mem": {
        "total": "133.3gb",
        "total_in_bytes": 143210778624,
        "adjusted_total": "133.3gb",
        "adjusted_total_in_bytes": 143210778624,
        "free": "10.4gb",
        "free_in_bytes": 11251134464,
        "used": "122.8gb",
        "used_in_bytes": 131959644160,
        "free_percent": 8,
        "used_percent": 92
      }
    },
    "process": {
      "cpu": {
        "percent": 0
      },
      "open_file_descriptors": {
        "min": 478,
        "max": 1001,
        "avg": 819
      }
    },
    "jvm": {
      "max_uptime": "20.3h",
      "max_uptime_in_millis": 73317314,
      "versions": [
        {
          "version": "19.0.2",
          "vm_name": "OpenJDK 64-Bit Server VM",
          "vm_version": "19.0.2+7-44",
          "vm_vendor": "Oracle Corporation",
          "bundled_jdk": true,
          "using_bundled_jdk": true,
          "count": 3
        }
      ],
      "mem": {
        "heap_used": "58.3gb",
        "heap_used_in_bytes": 62689051048,
        "heap_max": "68gb",
        "heap_max_in_bytes": 73014444032
      },
      "threads": 183
    },
    "fs": {
      "total": "623.6gb",
      "total_in_bytes": 669589733376,
      "free": "396.2gb",
      "free_in_bytes": 425482178560,
      "available": "368.8gb",
      "available_in_bytes": 395996483584
    },
    "plugins": [],
    "network_types": {
      "transport_types": {
        "netty4": 3
      },
      "http_types": {
        "netty4": 3
      }
    },
    "discovery_types": {
      "multi-node": 3
    },
    "packaging_types": [
      {
        "flavor": "default",
        "type": "deb",
        "count": 3
      }
    ],
    "ingest": {
      "number_of_pipelines": 498,
      "processor_stats": {
        "append": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "community_id": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "convert": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "csv": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "date": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "dissect": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "dot_expander": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "drop": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "fingerprint": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "foreach": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "geoip": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "grok": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "gsub": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "join": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "json": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "kv": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "lowercase": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "network_direction": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "pipeline": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "remove": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "rename": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "script": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "set": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "split": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "trim": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "uppercase": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "uri_parts": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "urldecode": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "user_agent": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        }
      }
    },
    "indexing_pressure": {
      "memory": {
        "current": {
          "combined_coordinating_and_primary": "0b",
          "combined_coordinating_and_primary_in_bytes": 0,
          "coordinating": "0b",
          "coordinating_in_bytes": 0,
          "primary": "0b",
          "primary_in_bytes": 0,
          "replica": "0b",
          "replica_in_bytes": 0,
          "all": "0b",
          "all_in_bytes": 0
        },
        "total": {
          "combined_coordinating_and_primary": "0b",
          "combined_coordinating_and_primary_in_bytes": 0,
          "coordinating": "0b",
          "coordinating_in_bytes": 0,
          "primary": "0b",
          "primary_in_bytes": 0,
          "replica": "0b",
          "replica_in_bytes": 0,
          "all": "0b",
          "all_in_bytes": 0,
          "coordinating_rejections": 0,
          "primary_rejections": 0,
          "replica_rejections": 0
        },
        "limit": "0b",
        "limit_in_bytes": 0
      }
    }
  }
}

Please make all your nodes master eligible. At the moment you run the risk of losing all your data if the master fails. You always want 3 master eligible nodes in any cluster.

Make sure it is not exactly 32GB so you use compressed pointers. You should be able to see this on startup.

I have assigned now the master role for each 3 nodes.

I reduced the JVM Heapsize and the usage is still on 90%.

Heap min and max should be below approx 31GB and not greater than that

I assigned the heap to 31GB.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.