High CPU usage 80/90% on elastic search cluster

Hi

Disclaimer: I'm new to elastic and have inheritted this cluster so please excuse my ignorance.

Our cluster is used for logs. We create an index for each system each day and some indexes are huge 500GB. We started noticing CPU hitting 80-90% and just stays there for prolonged periods 3hrs+. Not sure what other info is required , please do ask.

Cluster:
3 Masters
5 data nodes

Output of hot_thread

cluster stat

{
  "_nodes": {
    "total": 9,
    "successful": 9,
    "failed": 0
  },
  "cluster_name": "Logging",
  "timestamp": 1603394871868,
  "status": "green",
  "indices": {
    "count": 213,
    "shards": {
      "total": 2122,
      "primaries": 1061,
      "replication": 1,
      "index": {
        "shards": {
          "min": 2,
          "max": 10,
          "avg": 9.96244131455399
        },
        "primaries": {
          "min": 1,
          "max": 5,
          "avg": 4.981220657276995
        },
        "replication": {
          "min": 1,
          "max": 1,
          "avg": 1
        }
      }
    },
    "docs": {
      "count": 4279743140,
      "deleted": 2
    },
    "store": {
      "size_in_bytes": 9427457795487,
      "throttle_time_in_millis": 0
    },
    "fielddata": {
      "memory_size_in_bytes": 854475776,
      "evictions": 0
    },
    "query_cache": {
      "memory_size_in_bytes": 2360098368,
      "total_count": 20329557,
      "hit_count": 15220854,
      "miss_count": 5108703,
      "cache_size": 109846,
      "cache_count": 170986,
      "evictions": 61140
    },
    "completion": {
      "size_in_bytes": 0
    },
    "segments": {
      "count": 25623,
      "memory_in_bytes": 13598194468,
      "terms_memory_in_bytes": 9384259820,
      "stored_fields_memory_in_bytes": 4054761568,
      "term_vectors_memory_in_bytes": 0,
      "norms_memory_in_bytes": 31261440,
      "points_memory_in_bytes": 70613712,
      "doc_values_memory_in_bytes": 57297928,
      "index_writer_memory_in_bytes": 215728400,
      "version_map_memory_in_bytes": 2950556,
      "fixed_bit_set_memory_in_bytes": 0,
      "max_unsafe_auto_id_timestamp": 1603325198269,
      "file_sizes": {}
    }
  },
  "nodes": {
    "count": {
      "total": 9,
      "data": 6,
      "coordinating_only": 0,
      "master": 3,
      "ingest": 9
    },
    "versions": [
      "5.4.0"
    ],
    "os": {
      "available_processors": 54,
      "allocated_processors": 54,
      "names": [
        {
          "name": "Linux",
          "count": 9
        }
      ],
      "mem": {
        "total_in_bytes": 410450141184,
        "free_in_bytes": 18486890496,
        "used_in_bytes": 391963250688,
        "free_percent": 5,
        "used_percent": 95
      }
    },
    "process": {
      "cpu": {
        "percent": 463
      },
      "open_file_descriptors": {
        "min": 336,
        "max": 1242,
        "avg": 930
      }
    },
    "jvm": {
      "max_uptime_in_millis": 9306472124,
      "versions": [
        {
          "version": "1.8.0_144",
          "vm_name": "Java HotSpot(TM) 64-Bit Server VM",
          "vm_version": "25.144-b01",
          "vm_vendor": "Oracle Corporation",
          "count": 9
        }
      ],
      "mem": {
        "heap_used_in_bytes": 109308370864,
        "heap_max_in_bytes": 215351427072
      },
      "threads": 944
    },
    "fs": {
      "total_in_bytes": 11555457060864,
      "free_in_bytes": 2049472086016,
      "available_in_bytes": 2049472086016
    },
    "plugins": [],
    "network_types": {
      "transport_types": {
        "netty4": 9
      },
      "http_types": {
        "netty4": 9
      }
    }
  }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.