ElasticSearch Goes Down (Ram issue)

Hello, I assigned 3GB ram to elasticSearch and also I used one cluster with 3 nodes for it. 3 JDBC pipelines (log data with logstash) run on it and I want to extend it in the future too. and also I used Docker to run ELK stack.
But now some times elasticSearch service goes Down, and I get this error:
java.lang.OutOfMemoryError: Java heap space

what's the matter and how I can troubleshoot and solve it?
how much ram do I need to run It in a common way?
how I can know how much ram covers my requirements?

How much RAM does the hosts running Elasticsearch have? How much data have you loaded into Elasticsearch? What is the full output of the cluster stats API?

Hi,
Thanks for your reply.

RAM:
VM 8GB:

  • Logstash 512 MB
  • Elasticsearch 3 GB

DATA:

  • 10 GB

Cluster stat API:

{
  "_nodes" : {
    "total" : 1,
    "successful" : 1,
    "failed" : 0
  },
  "cluster_name" : "docker-cluster",
  "cluster_uuid" : "K1HAFtwWQ6e_ZIksa2Zf7Q",
  "timestamp" : 1625374748063,
  "status" : "yellow",
  "indices" : {
    "count" : 51,
    "shards" : {
      "total" : 51,
      "primaries" : 51,
      "replication" : 0.0,
      "index" : {
        "shards" : {
          "min" : 1,
          "max" : 1,
          "avg" : 1.0
        },
        "primaries" : {
          "min" : 1,
          "max" : 1,
          "avg" : 1.0
        },
        "replication" : {
          "min" : 0.0,
          "max" : 0.0,
          "avg" : 0.0
        }
      }
    },
    "docs" : {
      "count" : 48798945,
      "deleted" : 2273624
    },
    "store" : {
      "size_in_bytes" : 9314268104,
      "reserved_in_bytes" : 0
    },
    "fielddata" : {
      "memory_size_in_bytes" : 31568,
      "evictions" : 0
    },
    "query_cache" : {
      "memory_size_in_bytes" : 1211081,
      "total_count" : 44768,
      "hit_count" : 11799,
      "miss_count" : 32969,
      "cache_size" : 110,
      "cache_count" : 1104,
      "evictions" : 994
    },
    "completion" : {
      "size_in_bytes" : 0
    },
    "segments" : {
      "count" : 334,
      "memory_in_bytes" : 2762298,
      "terms_memory_in_bytes" : 1063872,
      "stored_fields_memory_in_bytes" : 170336,
      "term_vectors_memory_in_bytes" : 0,
      "norms_memory_in_bytes" : 40640,
      "points_memory_in_bytes" : 0,
      "doc_values_memory_in_bytes" : 1487450,
      "index_writer_memory_in_bytes" : 152045548,
      "version_map_memory_in_bytes" : 1370995,
      "fixed_bit_set_memory_in_bytes" : 553240,
      "max_unsafe_auto_id_timestamp" : 1625290956878,
      "file_sizes" : { }
    },
    "mappings" : {
      "field_types" : [
        {
          "name" : "alias",
          "count" : 3,
          "index_count" : 1
        },
        {
          "name" : "boolean",
          "count" : 139,
          "index_count" : 24
        },
        {
          "name" : "byte",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "constant_keyword",
          "count" : 2,
          "index_count" : 1
        },
        {
          "name" : "date",
          "count" : 207,
          "index_count" : 43
        },
        {
          "name" : "double",
          "count" : 174,
          "index_count" : 7
        },
        {
          "name" : "float",
          "count" : 276,
          "index_count" : 10
        },
        {
          "name" : "geo_point",
          "count" : 27,
          "index_count" : 9
        },
        {
          "name" : "half_float",
          "count" : 74,
          "index_count" : 20
        },
        {
          "name" : "integer",
          "count" : 130,
          "index_count" : 13
        },
        {
          "name" : "ip",
          "count" : 53,
          "index_count" : 9
        },
        {
          "name" : "keyword",
          "count" : 2646,
          "index_count" : 43
        },
        {
          "name" : "long",
          "count" : 3548,
          "index_count" : 37
        },
        {
          "name" : "nested",
          "count" : 39,
          "index_count" : 14
        },
        {
          "name" : "object",
          "count" : 3432,
          "index_count" : 34
        },
        {
          "name" : "scaled_float",
          "count" : 143,
          "index_count" : 1
        },
        {
          "name" : "short",
          "count" : 15,
          "index_count" : 5
        },
        {
          "name" : "text",
          "count" : 246,
          "index_count" : 29
        }
      ]
    },
    "analysis" : {
      "char_filter_types" : [ ],
      "tokenizer_types" : [ ],
      "filter_types" : [ ],
      "analyzer_types" : [ ],
      "built_in_char_filters" : [ ],
      "built_in_tokenizers" : [ ],
      "built_in_filters" : [ ],
      "built_in_analyzers" : [
        {
          "name" : "english",
          "count" : 1,
          "index_count" : 1
        }
      ]
    },
    "versions" : [
      {
        "version" : "7.11.1",
        "index_count" : 51,
        "primary_shard_count" : 51,
        "total_primary_bytes" : 9314268104
      }
    ]
  },
  "nodes" : {
    "count" : {
      "total" : 1,
      "coordinating_only" : 0,
      "data" : 1,
      "data_cold" : 1,
      "data_content" : 1,
      "data_hot" : 1,
      "data_warm" : 1,
      "ingest" : 1,
      "master" : 1,
      "ml" : 0,
      "remote_cluster_client" : 1,
      "transform" : 1,
      "voting_only" : 0
    },
    "versions" : [
      "7.11.1"
    ],
    "os" : {
      "available_processors" : 6,
      "allocated_processors" : 6,
      "names" : [
        {
          "name" : "Linux",
          "count" : 1
        }
      ],
      "pretty_names" : [
        {
          "pretty_name" : "CentOS Linux 8",
          "count" : 1
        }
      ],
      "mem" : {
        "total_in_bytes" : 16791232512,
        "free_in_bytes" : 315572224,
        "used_in_bytes" : 16475660288,
        "free_percent" : 2,
        "used_percent" : 98
      }
    },
    "process" : {
      "cpu" : {
        "percent" : 2
      },
      "open_file_descriptors" : {
        "min" : 707,
        "max" : 707,
        "avg" : 707
      }
    },
    "jvm" : {
      "max_uptime_in_millis" : 83810397,
      "versions" : [
        {
          "version" : "15.0.1",
          "vm_name" : "OpenJDK 64-Bit Server VM",
          "vm_version" : "15.0.1+9",
          "vm_vendor" : "AdoptOpenJDK",
          "bundled_jdk" : true,
          "using_bundled_jdk" : true,
          "count" : 1
        }
      ],
      "mem" : {
        "heap_used_in_bytes" : 1729986552,
        "heap_max_in_bytes" : 3221225472
      },
      "threads" : 74
    },
    "fs" : {
      "total_in_bytes" : 315990278144,
      "free_in_bytes" : 285985746944,
      "available_in_bytes" : 269863002112
    },
    "plugins" : [ ],
    "network_types" : {
      "transport_types" : {
        "security4" : 1
      },
      "http_types" : {
        "security4" : 1
      }
    },
    "discovery_types" : {
      "single-node" : 1
    },
    "packaging_types" : [
      {
        "flavor" : "default",
        "type" : "docker",
        "count" : 1
      }
    ],
    "ingest" : {
      "number_of_pipelines" : 19,
      "processor_stats" : {
        "append" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "conditional" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "date" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "geoip" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "grok" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "gsub" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "remove" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "rename" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "script" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "set" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        }
      }
    }
  }
}```

I do not see any issues there. What does the Elasticsearch logs say before the OOM?

This is the log file:

any solution and reply?!

Last line of logs are:

{"type": "server", "timestamp": "2021-07-04T09:54:27,518Z", "level": "WARN", "component": "o.e.t.ThreadPool", "cluster.name": "docker-cluster", "node.name": "175ed24e3855", "message": "failed to run scheduled task [org.elasticsearch.indices.IndexingMemoryController$ShardsIndicesStatusChecker@40f0573a] on thread pool [same]", "cluster.uuid": "K1HAFtwWQ6e_ZIksa2Zf7Q", "node.id": "VLbG_sLjS9OVC81NsytOsQ" , 
"stacktrace": ["org.apache.lucene.store.AlreadyClosedException: this IndexWriter is closed",
"at org.apache.lucene.index.IndexWriter.ensureOpen(IndexWriter.java:877) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.IndexWriter.ensureOpen(IndexWriter.java:891) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.IndexWriter.getFlushingBytes(IndexWriter.java:781) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.elasticsearch.index.engine.InternalEngine.getWritingBytes(InternalEngine.java:614) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.index.shard.IndexShard.getWritingBytes(IndexShard.java:1026) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.indices.IndexingMemoryController.getShardWritingBytes(IndexingMemoryController.java:171) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.indices.IndexingMemoryController$ShardsIndicesStatusChecker.runUnlocked(IndexingMemoryController.java:299) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.indices.IndexingMemoryController$ShardsIndicesStatusChecker.run(IndexingMemoryController.java:279) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.threadpool.Scheduler$ReschedulingRunnable.doRun(Scheduler.java:202) [elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:732) [elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26) [elasticsearch-7.11.1.jar:7.11.1]",
"at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) [?:?]",
"at java.util.concurrent.FutureTask.run(FutureTask.java:264) [?:?]",
"at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304) [?:?]",
"at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130) [?:?]",
"at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630) [?:?]",
"at java.lang.Thread.run(Thread.java:832) [?:?]",
"Caused by: java.lang.OutOfMemoryError: Java heap space",
"at org.apache.lucene.util.ByteBlockPool$DirectTrackingAllocator.getByteBlock(ByteBlockPool.java:105) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.util.ByteBlockPool.nextBuffer(ByteBlockPool.java:205) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.util.BytesRefHash.add(BytesRefHash.java:273) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.SortedSetDocValuesWriter.addOneValue(SortedSetDocValuesWriter.java:116) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.SortedSetDocValuesWriter.addValue(SortedSetDocValuesWriter.java:87) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.DefaultIndexingChain.indexDocValue(DefaultIndexingChain.java:721) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:561) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:488) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.DocumentsWriterPerThread.updateDocuments(DocumentsWriterPerThread.java:208) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.DocumentsWriter.updateDocuments(DocumentsWriter.java:419) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.IndexWriter.updateDocuments(IndexWriter.java:1471) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.apache.lucene.index.IndexWriter.softUpdateDocument(IndexWriter.java:1799) ~[lucene-core-8.7.0.jar:8.7.0 2dc63e901c60cda27ef3b744bc554f1481b3b067 - atrisharma - 2020-10-29 19:35:28]",
"at org.elasticsearch.index.engine.InternalEngine.updateDocs(InternalEngine.java:1292) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.index.engine.InternalEngine.indexIntoLucene(InternalEngine.java:1121) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:952) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.index.shard.IndexShard.index(IndexShard.java:871) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.index.shard.IndexShard.applyIndexOperation(IndexShard.java:843) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.index.shard.IndexShard.applyIndexOperationOnPrimary(IndexShard.java:800) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.action.bulk.TransportShardBulkAction.executeBulkItemRequest(TransportShardBulkAction.java:274) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.action.bulk.TransportShardBulkAction$2.doRun(TransportShardBulkAction.java:164) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.action.bulk.TransportShardBulkAction.performOnPrimary(TransportShardBulkAction.java:209) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.action.bulk.TransportShardBulkAction.dispatchedShardOperationOnPrimary(TransportShardBulkAction.java:115) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.action.bulk.TransportShardBulkAction.dispatchedShardOperationOnPrimary(TransportShardBulkAction.java:74) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.action.support.replication.TransportWriteAction$1.doRun(TransportWriteAction.java:168) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:732) ~[elasticsearch-7.11.1.jar:7.11.1]",
"at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26) ~[elasticsearch-7.11.1.jar:7.11.1]",
"... 3 more"] }
{"type": "server", "timestamp": "2021-07-04T09:54:27,521Z", "level": "WARN", "component": "o.e.m.j.JvmGcMonitorService", "cluster.name": "docker-cluster", "node.name": "175ed24e3855", "message": "[gc][101225] overhead, spent [30.5s] collecting in the last [30.6s]", "cluster.uuid": "K1HAFtwWQ6e_ZIksa2Zf7Q", "node.id": "VLbG_sLjS9OVC81NsytOsQ"  }

In case something has been fixed since 7.11, could you upgrade to 7.13.2?

1 Like

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.