How to optimize memory and heap usage for Single node Elastic Search

Hi everyone, I have set up a single node latest version elasticsearch cluster with Xpack and Kibana UI on the same VM. The VM has 23 GB RAM. I have provided 12 GB of JAVA heap space. But still, elastic search uses all of VM's RAM and shuts down constantly after starting for heap space error or OutOFMemory error. We have logs coming from K8s Cluster using Rancher (fluentd pods). It is creating a daily new shard.

can we set some config where it clears the memory/heap if it's almost completely filled to prevent crashing?

  1. Why elasticsearch is using all the RAM of VM even though only 12 GB of heap space is allocated?
  2. Why it is taking 15-20 minutes to stop the elastic service?
  3. How can I reduce its memory consumption?
  4. How can I reduce the load on elastic search incoming data?

Need help in creating a setup that will be up all the time.

Without describing your deployment it is pretty much impossible to help. Version of the cluster, number of nodes, indices, shards, type of workload, type of queries, type of documents, mapping configurations, java versions.

Also the nodes info and the nodes stats API might be a good indicator for first look where your memory goes.

Thanks for the reply.
I have a single node Cluster. Version: 7.9.2
java version: OpenJDK version "1.8.0_262"
It is getting logs from the Kubernetes cluster.
I have Rancher setup on Kubernetes, sending logs. It creates a new index daily.
I don't think data is more than 3 GBs.
I am running queries on Kibana UI, to search logs. I used filters from available fields.
It is mostly OutOfMemory error.
Is there any configuration where it will clear the memory and heapspace after reaching a limit ?

Node stats are here on this link:

{
  "_nodes" : {
    "total" : 1,
    "successful" : 1,
    "failed" : 0
  },
  "cluster_name" : "elasticsearch",
  "nodes" : {
    "ZrjMu9DxSRWTePsJ_S63kg" : {
      "timestamp" : 1603116851336,
      "name" : "xxxxxxxxxx",
      "transport_address" : "192.168.x.x:9300",
      "host" : "192.168.x.x",
      "ip" : "192.168x.x:9300",
      "roles" : [
        "data",
        "ingest",
        "master",
        "remote_cluster_client",
        "transform"
      ],
      "attributes" : {
        "xpack.installed" : "true",
        "transform.node" : "true"
      },
      "indices" : {
        "docs" : {
          "count" : 2100234,
          "deleted" : 57
        },
        "store" : {
          "size_in_bytes" : 831176128,
          "reserved_in_bytes" : 0
        },
        "indexing" : {
          "index_total" : 186872,
          "index_time_in_millis" : 17329,
          "index_current" : 0,
          "index_failed" : 0,
          "delete_total" : 1,
          "delete_time_in_millis" : 4,
          "delete_current" : 0,
          "noop_update_total" : 0,
          "is_throttled" : false,
          "throttle_time_in_millis" : 0
        },
        "get" : {
          "total" : 144,
          "time_in_millis" : 229,
          "exists_total" : 136,
          "exists_time_in_millis" : 228,
          "missing_total" : 8,
          "missing_time_in_millis" : 1,
          "current" : 0
        },
        "search" : {
          "open_contexts" : 0,
          "query_total" : 220,
          "query_time_in_millis" : 778,
          "query_current" : 0,
          "fetch_total" : 216,
          "fetch_time_in_millis" : 921,
          "fetch_current" : 0,
          "scroll_total" : 160,
          "scroll_time_in_millis" : 2443,
          "scroll_current" : 0,
          "suggest_total" : 0,
          "suggest_time_in_millis" : 0,
          "suggest_current" : 0
        },
        "merges" : {
          "current" : 0,
          "current_docs" : 0,
          "current_size_in_bytes" : 0,
          "total" : 5,
          "total_time_in_millis" : 11097,
          "total_docs" : 282309,
          "total_size_in_bytes" : 99722168,
          "total_stopped_time_in_millis" : 0,
          "total_throttled_time_in_millis" : 3164,
          "total_auto_throttle_in_bytes" : 207808698
        },
        "refresh" : {
          "total" : 78,
          "total_time_in_millis" : 4639,
          "external_total" : 76,
          "external_total_time_in_millis" : 4527,
          "listeners" : 0
        },
        "flush" : {
          "total" : 6,
          "periodic" : 0,
          "total_time_in_millis" : 0
        },
        "warmer" : {
          "current" : 0,
          "total" : 64,
          "total_time_in_millis" : 22
        },
        "query_cache" : {
          "memory_size_in_bytes" : 0,
          "total_count" : 33,
          "hit_count" : 0,
          "miss_count" : 33,
          "cache_size" : 0,
          "cache_count" : 0,
          "evictions" : 0
        },
        "fielddata" : {
          "memory_size_in_bytes" : 0,
          "evictions" : 0
        },
        "completion" : {
          "size_in_bytes" : 0
        },
        "segments" : {
          "count" : 58,
          "memory_in_bytes" : 438592,
          "terms_memory_in_bytes" : 342400,
          "stored_fields_memory_in_bytes" : 40720,
          "term_vectors_memory_in_bytes" : 0,
          "norms_memory_in_bytes" : 43392,
          "points_memory_in_bytes" : 0,
          "doc_values_memory_in_bytes" : 12080,
          "index_writer_memory_in_bytes" : 11918836,
          "version_map_memory_in_bytes" : 0,
          "fixed_bit_set_memory_in_bytes" : 640,
          "max_unsafe_auto_id_timestamp" : 1603115423735,
          "file_sizes" : { }
        },
        "translog" : {
          "operations" : 186873,
          "size_in_bytes" : 209986002,
          "uncommitted_operations" : 186873,
          "uncommitted_size_in_bytes" : 209986002,
          "earliest_last_modified_age" : 0
        },
        "request_cache" : {
          "memory_size_in_bytes" : 19986,
          "evictions" : 0,
          "hit_count" : 6,
          "miss_count" : 2
        },
        "recovery" : {
          "current_as_source" : 0,
          "current_as_target" : 0,
          "throttle_time_in_millis" : 0
        }
      },
      "os" : {
        "timestamp" : 1603116851348,
        "cpu" : {
          "percent" : 1,
          "load_average" : {
            "1m" : 0.17,
            "5m" : 0.39,
            "15m" : 0.89
          }
        },
        "mem" : {
          "total_in_bytes" : 25092276224,
          "free_in_bytes" : 773218304,
          "used_in_bytes" : 24319057920,
          "free_percent" : 3,
          "used_percent" : 97
        },
        "swap" : {
          "total_in_bytes" : 0,
          "free_in_bytes" : 0,
          "used_in_bytes" : 0
        },
        "cgroup" : {
          "cpuacct" : {
            "control_group" : "/",
            "usage_nanos" : 4487256782666
          },
          "cpu" : {
            "control_group" : "/",
            "cfs_period_micros" : 100000,
            "cfs_quota_micros" : -1,
            "stat" : {
              "number_of_elapsed_periods" : 0,
              "number_of_times_throttled" : 0,
              "time_throttled_nanos" : 0
            }
          },
          "memory" : {
            "control_group" : "/",
            "limit_in_bytes" : "9223372036854771712",
            "usage_in_bytes" : "23622434816"
          }
        }
      },
      "process" : {
        "timestamp" : 1603116851348,
        "open_file_descriptors" : 322,
        "max_file_descriptors" : 65535,
        "cpu" : {
          "percent" : 1,
          "total_in_millis" : 142660
        },
        "mem" : {
          "total_virtual_in_bytes" : 20090822656
        }
      },
      "jvm" : {
        "timestamp" : 1603116851349,
        "uptime_in_millis" : 506828,
        "mem" : {
          "heap_used_in_bytes" : 7357687296,
          "heap_used_percent" : 57,
          "heap_committed_in_bytes" : 12884901888,
          "heap_max_in_bytes" : 12884901888,
          "non_heap_used_in_bytes" : 155401240,
          "non_heap_committed_in_bytes" : 163594240,
          "pools" : {
            "young" : {
              "used_in_bytes" : 5511315456,
              "max_in_bytes" : 0,
              "peak_used_in_bytes" : 5511315456,
              "peak_max_in_bytes" : 0
            },
            "old" : {
              "used_in_bytes" : 1594713600,
              "max_in_bytes" : 12884901888,
              "peak_used_in_bytes" : 1594713600,
              "peak_max_in_bytes" : 12884901888
            },
            "survivor" : {
              "used_in_bytes" : 251658240,
              "max_in_bytes" : 0,
              "peak_used_in_bytes" : 251658240,
              "peak_max_in_bytes" : 0
            }
          }
        },
        "threads" : {
          "count" : 79,
          "peak_count" : 87
        },
        "gc" : {
          "collectors" : {
            "young" : {
              "collection_count" : 14,
              "collection_time_in_millis" : 295
            },
            "old" : {
              "collection_count" : 0,
              "collection_time_in_millis" : 0
            }
          }
        },
        "buffer_pools" : {
          "mapped" : {
            "count" : 98,
            "used_in_bytes" : 543093083,
            "total_capacity_in_bytes" : 543093083
          },
          "direct" : {
            "count" : 53,
            "used_in_bytes" : 8676189,
            "total_capacity_in_bytes" : 8676188
          },
          "mapped - 'non-volatile memory'" : {
            "count" : 0,
            "used_in_bytes" : 0,
            "total_capacity_in_bytes" : 0
          }
        },
        "classes" : {
          "current_loaded_count" : 20888,
          "total_loaded_count" : 20888,
          "total_unloaded_count" : 0
        }
      },
      "thread_pool" : {
        "analyze" : {
          "threads" : 0,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 0,
          "completed" : 0
        },
        "ccr" : {
          "threads" : 0,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 0,
          "completed" : 0
        },
        "fetch_shard_started" : {
          "threads" : 1,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 10,
          "completed" : 10
        },
        "fetch_shard_store" : {
          "threads" : 0,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 0,
          "completed" : 0
        },
        "flush" : {
          "threads" : 3,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 3,
          "completed" : 8
        },
        "force_merge" : {
          "threads" : 0,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 0,
          "completed" : 0
        },
        "generic" : {
          "threads" : 5,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 5,
          "completed" : 542
        },
        "get" : {
          "threads" : 8,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 8,
          "completed" : 114
        },
        "listener" : {
          "threads" : 0,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 0,
          "completed" : 0
        },
        "management" : {
          "threads" : 5,
          "queue" : 0,
          "active" : 1,
          "rejected" : 0,
          "largest" : 5,
          "completed" : 706
        },
        "refresh" : {
          "threads" : 4,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 4,
          "completed" : 4859
        },
        "rollup_indexing" : {
          "threads" : 0,
          "queue" : 0,
          "active" : 0,
          "rejected" : 0,
          "largest" : 0,
          "completed" : 0
        },

this does not look like a system under stress at first... also please use the human parameter when sharing this kind of output as it is much easier to read

The amount of data is indeed rather low. Can you share the out of memory exception from the logs?

Also, nodes stats/nodes info API with the human parameter would be nice.

ok.

This was the error

Caused by: java.lang.OutOfMemoryError: Java heap space
at io.netty.util.internal.PlatformDependent.allocateUninitializedArray(PlatformDependent.java:281) ~[?:?]
at io.netty.buffer.PoolArena$HeapArena.newByteArray(PoolArena.java:662) ~[?:?]
at io.netty.buffer.PoolArena$HeapArena.newChunk(PoolArena.java:672) ~[?:?]
at io.netty.buffer.PoolArena.allocateNormal(PoolArena.java:247) ~[?:?]
at io.netty.buffer.PoolArena.allocate(PoolArena.java:227) ~[?:?]
at io.netty.buffer.PoolArena.allocate(PoolArena.java:147) ~[?:?]
at io.netty.buffer.PooledByteBufAllocator.newHeapBuffer(PooledByteBufAllocator.java:339) ~[?:?]
at io.netty.buffer.AbstractByteBufAllocator.heapBuffer(AbstractByteBufAllocator.java:168) ~[?:?]
at io.netty.buffer.AbstractByteBufAllocator.heapBuffer(AbstractByteBufAllocator.java:159) ~[?:?]
at org.elasticsearch.transport.NettyAllocator$NoDirectBuffers.heapBuffer(NettyAllocator.java:137) ~[?:?]
at org.elasticsearch.transport.NettyAllocator$NoDirectBuffers.ioBuffer(NettyAllocator.java:122) ~[?:?]
at io.netty.channel.DefaultMaxMessagesRecvByteBufAllocator$MaxMessageHandle.allocate(DefaultMaxMessagesRecvByteBufAllocator.java:114) ~[?:?]
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:147) ~[?:?]
... 7 more
[2020-10-09T21:09:40,259][WARN ][o.e.h.AbstractHttpServerTransport] [localhost.localdomain] caught exception while handling client http traffic, closing connection Netty4HttpChannel{localAddress=/192.168.x.x:9200, remoteAddress=/10.x.x.x:55882}
java.lang.Exception: java.lang.OutOfMemoryError: Java heap space
at org.elasticsearch.http.netty4.Netty4HttpRequestHandler.exceptionCaught(Netty4HttpRequestHandler.java:69) [transport-netty4-client-7.8.0.jar:7.8.0]
at io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:302) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:281) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireExceptionCaught(AbstractChannelHandlerContext.java:273) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.DefaultChannelPipeline$HeadContext.exceptionCaught(DefaultChannelPipeline.java:1377) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:302) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:281) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.DefaultChannelPipeline.fireExceptionCaught(DefaultChannelPipeline.java:907) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.handleReadException(AbstractNioByteChannel.java:125) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:174) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:714) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:615) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:578) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493) [netty-transport-4.1.49.Final.jar:4.1.49.Final]
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989) [netty-common-4.1.49.Final.jar:4.1.49.Final]
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) [netty-common-4.1.49.Final.jar:4.1.49.Final]
at java.lang.Thread.run(Thread.java:832) [?:?]
Caused by: java.lang.OutOfMemoryError: Java heap space
at io.netty.util.internal.PlatformDependent.allocateUninitializedArray(PlatformDependent.java:281) ~[?:?]
at io.netty.buffer.PoolArena$HeapArena.newByteArray(PoolArena.java:662) ~[?:?]
at io.netty.buffer.PoolArena$HeapArena.newChunk(PoolArena.java:672) ~[?:?]
at io.netty.buffer.PoolArena.allocateNormal(PoolArena.java:247) ~[?:?]
at io.netty.buffer.PoolArena.allocate(PoolArena.java:227) ~[?:?]
at io.netty.buffer.PoolArena.allocate(PoolArena.java:147) ~[?:?]
at io.netty.buffer.PooledByteBufAllocator.newHeapBuffer(PooledByteBufAllocator.java:339) ~[?:?]
at io.netty.buffer.AbstractByteBufAllocator.heapBuffer(AbstractByteBufAllocator.java:168) ~[?:?]
at io.netty.buffer.AbstractByteBufAllocator.heapBuffer(AbstractByteBufAllocator.java:159) ~[?:?]
at org.elasticsearch.transport.NettyAllocator$NoDirectBuffers.heapBuffer(NettyAllocator.java:137) ~[?:?]
at org.elasticsearch.transport.NettyAllocator$NoDirectBuffers.ioBuffer(NettyAllocator.java:122) ~[?:?]
at io.netty.channel.DefaultMaxMessagesRecvByteBufAllocator$MaxMessageHandle.allocate(DefaultMaxMessagesRecvByteBufAllocator.java:114) ~[?:?]
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:147) ~[?:?]
... 7 more
[2020-10-09T21:09:40,288][ERROR][o.e.b.ElasticsearchUncaughtExceptionHandler] [localhost.localdomain] fatal error in thread [Thread-17], exiting
java.lang.OutOfMemoryError: Java heap space
at io.netty.util.internal.PlatformDependent.allocateUninitializedArray(PlatformDependent.java:281) ~[?:?]
at io.netty.buffer.PoolArena$HeapArena.newByteArray(PoolArena.java:662) ~[?:?]
at io.netty.buffer.PoolArena$HeapArena.newChunk(PoolArena.java:672) ~[?:?]
at io.netty.buffer.PoolArena.allocateNormal(PoolArena.java:247) ~[?:?]
at io.netty.buffer.PoolArena.allocate(PoolArena.java:227) ~[?:?]
at io.netty.buffer.PoolArena.allocate(PoolArena.java:147) ~[?:?]
at io.netty.buffer.PooledByteBufAllocator.newHeapBuffer(PooledByteBufAllocator.java:339) ~[?:?]
at io.netty.buffer.AbstractByteBufAllocator.heapBuffer(AbstractByteBufAllocator.java:168) ~[?:?]
at io.netty.buffer.AbstractByteBufAllocator.heapBuffer(AbstractByteBufAllocator.java:159) ~[?:?]
at org.elasticsearch.transport.NettyAllocator$NoDirectBuffers.heapBuffer(NettyAllocator.java:137) ~[?:?]
at org.elasticsearch.transport.NettyAllocator$NoDirectBuffers.ioBuffer(NettyAllocator.java:122) ~[?:?]
at io.netty.channel.DefaultMaxMessagesRecvByteBufAllocator$MaxMessageHandle.allocate(DefaultMaxMessagesRecvByteBufAllocator.java:114) ~[?:?]
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:147) ~[?:?]
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:714) ~[?:?]
at io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:615) ~[?:?]
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:578) ~[?:?]
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493) ~[?:?]
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989) ~[?:?]
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) ~[?:?]
at java.lang.Thread.run(Thread.java:832) [?:?]
[2020-10-09T21:09:40,288][ERROR][o.e.b.ElasticsearchUncaughtExceptionHandler] [localhost.localdomain] fatal error in thread [Thread-18], exiting
java.lang.OutOfMemoryError: Java heap space
at io.netty.util.internal.PlatformDependent.allocateUninitializedArray(PlatformDependent.java:281) ~[?:?]
at io.netty.buffer.PoolArena$HeapArena.newByteArray(PoolArena.java:662) ~[?:?]
at io.netty.buffer.PoolArena$HeapArena.newChunk(PoolArena.java:672) ~[?:?]
at io.netty.buffer.PoolArena.allocateNormal(PoolArena.java:247) ~[?:?]
at io.netty.buffer.PoolArena.allocate(PoolArena.java:227) ~[?:?]
at io.netty.buffer.PoolArena.allocate(PoolArena.java:147) ~[?:?]
at io.netty.buffer.PooledByteBufAllocator.newHeapBuffer(PooledByteBufAllocator.java:339) ~[?:?]
at io.netty.buffer.AbstractByteBufAllocator.heapBuffer(AbstractByteBufAllocator.java:168) ~[?:?]
at io.netty.buffer.AbstractByteBufAllocator.heapBuffer(AbstractByteBufAllocator.java:159) ~[?:?]
at org.elasticsearch.transport.NettyAllocator$NoDirectBuffers.heapBuffer(NettyAllocator.java:137) ~[?:?]
at org.elasticsearch.transport.NettyAllocator$NoDirectBuffers.ioBuffer(NettyAllocator.java:122) ~[?:?]
at io.netty.channel.DefaultMaxMessagesRecvByteBufAllocator$MaxMessageHandle.allocate(DefaultMaxMessagesRecvByteBufAllocator.java:114) ~[?:?]
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:147) ~[?:?]
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:714) ~[?:?]
at io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:615) ~[?:?]
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:578) ~[?:?]
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493) ~[?:?]
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989) ~[?:?]
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) ~[?:?]
at java.lang.Thread.run(Thread.java:832) [?:?]

Thanks.
Anyway, I decided to go with Docker Compose approach


This link helped me to create a stable and working cluster.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.