Elasticsearch huge CPU utilisation

Hello,
We have ELK Stack in our Company.
Last time I had stability issues with Elasticsearch - meaning Elasticsearch was running after restart and after 3 hours was hung.
Right now I see java process in Elasticsearch is consuming a lot of processor time.

Can You please advise how can I start investigation of this issue and what is consuming a log of CPU ?memory

Hi Dominik,
I'd start by looking at the output of the hot threads API

1 Like

Hello, Mark
Thanks for reply,
when I'm analyzing hot threads, I founded the following:


102.3% (511.3ms out of 500ms) cpu usage by thread 'elasticsearch[elasticsearch-main-0][management][T#5]'
9.2% (45.9ms out of 500ms) cpu usage by thread 'elasticsearch[elasticsearch-main-0][transport_worker][T#6]'

As I assume, transport worker is reposnsible of communication with logstash,
but do You know what does mean 'management' with utilisation of more than 100% ?

What is the output from the _cluster/stats?pretty&human API?

Various tasks are performed using management threads. Can you post the stacktrace under that line so we can see which task might be dominating?

Hello,
Thanks for Your reply,
please find exported file in attach.
And many thanks also for this api - currently I have a doubts if one node of Elasticsearch is enough a such amount of shards and indices. And also I have 20GB of RAM memory for this node, please let me know if this is enough.

{
  "_nodes" : {
    "total" : 1,
    "successful" : 1,
    "failed" : 0
  },
  "cluster_name" : "main-cluster",
  "cluster_uuid" : "-I4W_co8Tn2QO__QW5bbCg",
  "timestamp" : 1634295503145,
  "status" : "yellow",
  "indices" : {
    "count" : 4484,
    "shards" : {
      "total" : 7824,
      "primaries" : 7824,
      "replication" : 0.0,
      "index" : {
        "shards" : {
          "min" : 1,
          "max" : 5,
          "avg" : 1.744870651204282
        },
        "primaries" : {
          "min" : 1,
          "max" : 5,
          "avg" : 1.744870651204282
        },
        "replication" : {
          "min" : 0.0,
          "max" : 0.0,
          "avg" : 0.0
        }
      }
    },
    "docs" : {
      "count" : 624816902,
      "deleted" : 6792
    },
    "store" : {
      "size" : "238.5gb",
      "size_in_bytes" : 256194468121,
      "reserved" : "0b",
      "reserved_in_bytes" : 0
    },
    "fielddata" : {
      "memory_size" : "1.2kb",
      "memory_size_in_bytes" : 1264,
      "evictions" : 0
    },
    "query_cache" : {
      "memory_size" : "3mb",
      "memory_size_in_bytes" : 3225577,
      "total_count" : 12825,
      "hit_count" : 1288,
      "miss_count" : 11537,
      "cache_size" : 370,
      "cache_count" : 375,
      "evictions" : 5
    },
    "completion" : {
      "size" : "0b",
      "size_in_bytes" : 0
    },
    "segments" : {
      "count" : 40283,
      "memory" : "414mb",
      "memory_in_bytes" : 434151758,
      "terms_memory" : "339.6mb",
      "terms_memory_in_bytes" : 356152704,
      "stored_fields_memory" : "19.1mb",
      "stored_fields_memory_in_bytes" : 20066952,
      "term_vectors_memory" : "0b",
      "term_vectors_memory_in_bytes" : 0,
      "norms_memory" : "37.7mb",
      "norms_memory_in_bytes" : 39539200,
      "points_memory" : "0b",
      "points_memory_in_bytes" : 0,
      "doc_values_memory" : "17.5mb",
      "doc_values_memory_in_bytes" : 18392902,
      "index_writer_memory" : "29.7mb",
      "index_writer_memory_in_bytes" : 31147064,
      "version_map_memory" : "361.9kb",
      "version_map_memory_in_bytes" : 370638,
      "fixed_bit_set" : "35.5mb",
      "fixed_bit_set_memory_in_bytes" : 37239368,
      "max_unsafe_auto_id_timestamp" : 1634204694625,
      "file_sizes" : { }
    },
    "mappings" : {
      "field_types" : [
        {
          "name" : "alias",
          "count" : 12852,
          "index_count" : 378
        },
        {
          "name" : "binary",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "boolean",
          "count" : 41360,
          "index_count" : 454
        },
        {
          "name" : "byte",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "constant_keyword",
          "count" : 2,
          "index_count" : 1
        },
        {
          "name" : "date",
          "count" : 60691,
          "index_count" : 4468
        },
        {
          "name" : "date_nanos",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "date_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "double",
          "count" : 12475,
          "index_count" : 379
        },
        {
          "name" : "double_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "flattened",
          "count" : 3024,
          "index_count" : 378
        },
        {
          "name" : "float",
          "count" : 12969,
          "index_count" : 1085
        },
        {
          "name" : "float_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "geo_point",
          "count" : 3205,
          "index_count" : 559
        },
        {
          "name" : "geo_shape",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "half_float",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "integer",
          "count" : 3068,
          "index_count" : 188
        },
        {
          "name" : "integer_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "ip",
          "count" : 47432,
          "index_count" : 560
        },
        {
          "name" : "ip_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "keyword",
          "count" : 1473613,
          "index_count" : 4468
        },
        {
          "name" : "long",
          "count" : 379411,
          "index_count" : 4463
        },
        {
          "name" : "long_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "nested",
          "count" : 1357,
          "index_count" : 573
        },
        {
          "name" : "object",
          "count" : 302328,
          "index_count" : 4467
        },
        {
          "name" : "shape",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "short",
          "count" : 38179,
          "index_count" : 379
        },
        {
          "name" : "text",
          "count" : 133946,
          "index_count" : 4467
        }
      ]
    },
    "analysis" : {
      "char_filter_types" : [ ],
      "tokenizer_types" : [ ],
      "filter_types" : [ ],
      "analyzer_types" : [ ],
      "built_in_char_filters" : [ ],
      "built_in_tokenizers" : [ ],
      "built_in_filters" : [ ],
      "built_in_analyzers" : [ ]
    },
    "versions" : [
      {
        "version" : "7.8.0",
        "index_count" : 6,
        "primary_shard_count" : 6,
        "total_primary_size" : "547.4kb",
        "total_primary_bytes" : 560614
      },
      {
        "version" : "7.10.1",
        "index_count" : 38,
        "primary_shard_count" : 38,
        "total_primary_size" : "2.6gb",
        "total_primary_bytes" : 2820692333
      },
      {
        "version" : "7.11.1",
        "index_count" : 4440,
        "primary_shard_count" : 7780,
        "total_primary_size" : "235.9gb",
        "total_primary_bytes" : 253373215174
      }
    ]
  },
  "nodes" : {
    "count" : {
      "total" : 1,
      "coordinating_only" : 0,
      "data" : 1,
      "data_cold" : 1,
      "data_content" : 1,
      "data_hot" : 1,
      "data_warm" : 1,
      "ingest" : 1,
      "master" : 1,
      "ml" : 1,
      "remote_cluster_client" : 1,
      "transform" : 1,
      "voting_only" : 0
    },
    "versions" : [
      "7.11.1"
    ],
    "os" : {
      "available_processors" : 6,
      "allocated_processors" : 6,
      "names" : [
        {
          "name" : "Linux",
          "count" : 1
        }
      ],
      "pretty_names" : [
        {
          "pretty_name" : "CentOS Linux 8",
          "count" : 1
        }
      ],
      "mem" : {
        "total" : "20gb",
        "total_in_bytes" : 21474836480,
        "free" : "5.6mb",
        "free_in_bytes" : 5939200,
        "used" : "19.9gb",
        "used_in_bytes" : 21468897280,
        "free_percent" : 0,
        "used_percent" : 100
      }
    },
    "process" : {
      "cpu" : {
        "percent" : 36
      },
      "open_file_descriptors" : {
        "min" : 49688,
        "max" : 49688,
        "avg" : 49688
      }
    },
    "jvm" : {
      "max_uptime" : "1d",
      "max_uptime_in_millis" : 91498609,
      "versions" : [
        {
          "version" : "15.0.1",
          "vm_name" : "OpenJDK 64-Bit Server VM",
          "vm_version" : "15.0.1+9",
          "vm_vendor" : "AdoptOpenJDK",
          "bundled_jdk" : true,
          "using_bundled_jdk" : true,
          "count" : 1
        }
      ],
      "mem" : {
        "heap_used" : "8.7gb",
        "heap_used_in_bytes" : 9414204896,
        "heap_max" : "12gb",
        "heap_max_in_bytes" : 12884901888
      },
      "threads" : 228
    },
    "fs" : {
      "total" : "349.9gb",
      "total_in_bytes" : 375704780800,
      "free" : "108.9gb",
      "free_in_bytes" : 116941488128,
      "available" : "108.9gb",
      "available_in_bytes" : 116941488128
    },
    "plugins" : [
      {
        "name" : "repository-s3",
        "version" : "7.11.1",
        "elasticsearch_version" : "7.11.1",
        "java_version" : "1.8",
        "description" : "The S3 repository plugin adds S3 repositories",
        "classname" : "org.elasticsearch.repositories.s3.S3RepositoryPlugin",
        "extended_plugins" : [ ],
        "has_native_controller" : false,
        "licensed" : false,
        "type" : "isolated"
      }
    ],
    "network_types" : {
      "transport_types" : {
        "security4" : 1
      },
      "http_types" : {
        "security4" : 1
      }
    },
    "discovery_types" : {
      "zen" : 1
    },
    "packaging_types" : [
      {
        "flavor" : "default",
        "type" : "docker",
        "count" : 1
      }
    ],
    "ingest" : {
      "number_of_pipelines" : 17,
      "processor_stats" : {
        "conditional" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "geoip" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "grok" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "gsub" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "remove" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "rename" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "script" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "set" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        }
      }
    }
  }
}

That doesn't look like hot threads API output.
I was looking for the lines after this one:

102.3% (511.3ms out of 500ms) cpu usage by thread 'Elasticsearch[Elasticsearch-main-0][management][T#5]'