Primary shard storage bottleneck

Hi everyone, I want to ask why the primary shard indexes for 1 day are only stored on 1 Hot3 node. Causing the Hot3 node to get a high CPU boost and denying the bulk request from the Coordination node that controls my forwarding logs from Logstash to Elasticsearch is congested.

You need to provide more context about your cluster.

How many nodes do you have?

What is the result of running the following request in Dev tools.

GET _cat/nodes?v&h=name,diskTotal,diskUsedPercent,diskAvail

What is the full output of the cluster stats API?

And this is the result

Please do not post images of text. Instead copy and paste and make sure to use the formatting buttons.

this is

  "_nodes": {
    "total": 16,
    "successful": 16,
    "failed": 0
  },
  "cluster_name": "xxx-xxx",
  "cluster_uuid": "A8e4NOzFReSMSoE9yxixaQ",
  "timestamp": 1685977268903,
  "status": "red",
  "indices": {
    "count": 679,
    "shards": {
      "total": 1140,
      "primaries": 679,
      "replication": 0.678939617083947,
      "index": {
        "shards": {
          "min": 1,
          "max": 2,
          "avg": 1.678939617083947
        },
        "primaries": {
          "min": 1,
          "max": 1,
          "avg": 1
        },
        "replication": {
          "min": 0,
          "max": 1,
          "avg": 0.678939617083947
        }
      }
    },
    "docs": {
      "count": 9100465554,
      "deleted": 12377962
    },
    "store": {
      "size_in_bytes": 2845594205070,
      "total_data_set_size_in_bytes": 2845594205070,
      "reserved_in_bytes": 0
    },
    "fielddata": {
      "memory_size_in_bytes": 301724920,
      "evictions": 0
    },
    "query_cache": {
      "memory_size_in_bytes": 2570615780,
      "total_count": 1277138509,
      "hit_count": 149216308,
      "miss_count": 1127922201,
      "cache_size": 123384,
      "cache_count": 573556,
      "evictions": 450172
    },
    "completion": {
      "size_in_bytes": 0
    },
    "segments": {
      "count": 12827,
      "memory_in_bytes": 0,
      "terms_memory_in_bytes": 0,
      "stored_fields_memory_in_bytes": 0,
      "term_vectors_memory_in_bytes": 0,
      "norms_memory_in_bytes": 0,
      "points_memory_in_bytes": 0,
      "doc_values_memory_in_bytes": 0,
      "index_writer_memory_in_bytes": 531281598,
      "version_map_memory_in_bytes": 5352169,
      "fixed_bit_set_memory_in_bytes": 151481544,
      "max_unsafe_auto_id_timestamp": 1685973448412,
      "file_sizes": {}
    },
    "mappings": {
      "total_field_count": 239170,
      "total_deduplicated_field_count": 168104,
      "total_deduplicated_mapping_size_in_bytes": 1455653,
      "field_types": [
        {
          "name": "alias",
          "count": 1264,
          "index_count": 27,
          "script_count": 0
        },
        {
          "name": "binary",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "boolean",
          "count": 1856,
          "index_count": 537,
          "script_count": 0
        },
        {
          "name": "byte",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "constant_keyword",
          "count": 2332,
          "index_count": 491,
          "script_count": 0
        },
        {
          "name": "date",
          "count": 5519,
          "index_count": 645,
          "script_count": 0
        },
        {
          "name": "date_nanos",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "date_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "double",
          "count": 291,
          "index_count": 31,
          "script_count": 0
        },
        {
          "name": "double_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "flattened",
          "count": 521,
          "index_count": 211,
          "script_count": 0
        },
        {
          "name": "float",
          "count": 2592,
          "index_count": 288,
          "script_count": 0
        },
        {
          "name": "float_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "geo_point",
          "count": 662,
          "index_count": 283,
          "script_count": 0
        },
        {
          "name": "geo_shape",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "half_float",
          "count": 85,
          "index_count": 26,
          "script_count": 0
        },
        {
          "name": "histogram",
          "count": 3,
          "index_count": 3,
          "script_count": 0
        },
        {
          "name": "integer",
          "count": 200,
          "index_count": 40,
          "script_count": 0
        },
        {
          "name": "integer_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "ip",
          "count": 4536,
          "index_count": 512,
          "script_count": 0
        },
        {
          "name": "ip_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "keyword",
          "count": 151367,
          "index_count": 646,
          "script_count": 0
        },
        {
          "name": "long",
          "count": 19516,
          "index_count": 593,
          "script_count": 0
        },
        {
          "name": "long_range",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "match_only_text",
          "count": 2894,
          "index_count": 439,
          "script_count": 0
        },
        {
          "name": "nested",
          "count": 465,
          "index_count": 67,
          "script_count": 0
        },
        {
          "name": "object",
          "count": 40565,
          "index_count": 594,
          "script_count": 0
        },
        {
          "name": "scaled_float",
          "count": 658,
          "index_count": 98,
          "script_count": 0
        },
        {
          "name": "shape",
          "count": 2,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "short",
          "count": 441,
          "index_count": 15,
          "script_count": 0
        },
        {
          "name": "text",
          "count": 2599,
          "index_count": 564,
          "script_count": 0
        },
        {
          "name": "unsigned_long",
          "count": 105,
          "index_count": 11,
          "script_count": 0
        },
        {
          "name": "version",
          "count": 26,
          "index_count": 26,
          "script_count": 0
        },
        {
          "name": "wildcard",
          "count": 651,
          "index_count": 245,
          "script_count": 0
        }
      ],
      "runtime_field_types": []
    },
    "analysis": {
      "char_filter_types": [],
      "tokenizer_types": [],
      "filter_types": [],
      "analyzer_types": [
        {
          "name": "pattern",
          "count": 6,
          "index_count": 6
        }
      ],
      "built_in_char_filters": [],
      "built_in_tokenizers": [],
      "built_in_filters": [],
      "built_in_analyzers": [
        {
          "name": "powershell_script_analyzer",
          "count": 3,
          "index_count": 3
        },
        {
          "name": "whitespace",
          "count": 4,
          "index_count": 4
        },
        {
          "name": "winlogbeat_powershell_script_analyzer",
          "count": 3,
          "index_count": 3
        }
      ]
    },
    "versions": [
      {
        "version": "8.5.0",
        "index_count": 680,
        "primary_shard_count": 680,
        "total_primary_bytes": 2518783322457
      }
    ]
  },
  "nodes": {
    "count": {
      "total": 16,
      "coordinating_only": 2,
      "data": 0,
      "data_cold": 0,
      "data_content": 7,
      "data_frozen": 0,
      "data_hot": 5,
      "data_warm": 2,
      "ingest": 12,
      "master": 3,
      "ml": 2,
      "remote_cluster_client": 3,
      "transform": 12,
      "voting_only": 0
    },
    "versions": [
      "8.5.0"
    ],
    "os": {
      "available_processors": 108,
      "allocated_processors": 108,
      "names": [
        {
          "name": "Linux",
          "count": 16
        }
      ],
      "pretty_names": [
        {
          "pretty_name": "Ubuntu 20.04.5 LTS",
          "count": 16
        }
      ],
      "architectures": [
        {
          "arch": "amd64",
          "count": 16
        }
      ],
      "mem": {
        "total_in_bytes": 429496729600,
        "adjusted_total_in_bytes": 429496729600,
        "free_in_bytes": 122409902080,
        "used_in_bytes": 307086827520,
        "free_percent": 29,
        "used_percent": 71
      }
    },
    "process": {
      "cpu": {
        "percent": 188
      },
      "open_file_descriptors": {
        "min": 733,
        "max": 2734,
        "avg": 1569
      }
    },
    "jvm": {
      "max_uptime_in_millis": 3331527803,
      "versions": [
        {
          "version": "19",
          "vm_name": "OpenJDK 64-Bit Server VM",
          "vm_version": "19+36-2238",
          "vm_vendor": "Oracle Corporation",
          "bundled_jdk": true,
          "using_bundled_jdk": true,
          "count": 16
        }
      ],
      "mem": {
        "heap_used_in_bytes": 83956118280,
        "heap_max_in_bytes": 214748364800
      },
      "threads": 2627
    },
    "fs": {
      "total_in_bytes": 14390645006336,
      "free_in_bytes": 11150733922304,
      "available_in_bytes": 10477512306688
    },
    "plugins": [],
    "network_types": {
      "transport_types": {
        "security4": 16
      },
      "http_types": {
        "security4": 16
      }
    },
    "discovery_types": {
      "multi-node": 16
    },
    "packaging_types": [
      {
        "flavor": "default",
        "type": "docker",
        "count": 16
      }
    ],
    "ingest": {
      "number_of_pipelines": 789,
      "processor_stats": {
        "append": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "community_id": {
          "count": 42,
          "failed": 0,
          "current": 0,
          "time_in_millis": 2
        },
        "conditional": {
          "count": 111385028,
          "failed": 456,
          "current": 0,
          "time_in_millis": 4762130
        },
        "convert": {
          "count": 153613942,
          "failed": 319186,
          "current": 0,
          "time_in_millis": 238784
        },
        "csv": {
          "count": 11686,
          "failed": 2280,
          "current": 0,
          "time_in_millis": 47
        },
        "date": {
          "count": 143678708,
          "failed": 0,
          "current": 0,
          "time_in_millis": 2500973
        },
        "dissect": {
          "count": 2260425,
          "failed": 2999,
          "current": 0,
          "time_in_millis": 7318
        },
        "dot_expander": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "fingerprint": {
          "count": 84,
          "failed": 0,
          "current": 0,
          "time_in_millis": 10
        },
        "foreach": {
          "count": 127772,
          "failed": 0,
          "current": 0,
          "time_in_millis": 880
        },
        "geoip": {
          "count": 104497044,
          "failed": 125942,
          "current": 0,
          "time_in_millis": 244963
        },
        "grok": {
          "count": 980841,
          "failed": 127991,
          "current": 0,
          "time_in_millis": 41469
        },
        "gsub": {
          "count": 49975629,
          "failed": 0,
          "current": 0,
          "time_in_millis": 50579
        },
        "join": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "json": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "kv": {
          "count": 1381363,
          "failed": 0,
          "current": 0,
          "time_in_millis": 5500
        },
        "lowercase": {
          "count": 127772,
          "failed": 0,
          "current": 0,
          "time_in_millis": 270
        },
        "network_direction": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "pipeline": {
          "count": 192434538,
          "failed": 0,
          "current": 0,
          "time_in_millis": 68679
        },
        "registered_domain": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "remove": {
          "count": 339804670,
          "failed": 46890045,
          "current": 0,
          "time_in_millis": 1020197
        },
        "rename": {
          "count": 116123861,
          "failed": 14270087,
          "current": 0,
          "time_in_millis": 215597
        },
        "script": {
          "count": 1343710182,
          "failed": 0,
          "current": 0,
          "time_in_millis": 1889665
        },
        "set": {
          "count": 201593065,
          "failed": 24829553,
          "current": 0,
          "time_in_millis": 406922
        },
        "set_security_user": {
          "count": 143545951,
          "failed": 0,
          "current": 0,
          "time_in_millis": 829420
        },
        "split": {
          "count": 127772,
          "failed": 127772,
          "current": 0,
          "time_in_millis": 609
        },
        "trim": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "uppercase": {
          "count": 84,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "uri_parts": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "urldecode": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        },
        "user_agent": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time_in_millis": 0
        }
      }
    },
    "indexing_pressure": {
      "memory": {
        "current": {
          "combined_coordinating_and_primary_in_bytes": 0,
          "coordinating_in_bytes": 0,
          "primary_in_bytes": 0,
          "replica_in_bytes": 0,
          "all_in_bytes": 0
        },
        "total": {
          "combined_coordinating_and_primary_in_bytes": 0,
          "coordinating_in_bytes": 0,
          "primary_in_bytes": 0,
          "replica_in_bytes": 0,
          "all_in_bytes": 0,
          "coordinating_rejections": 0,
          "primary_rejections": 0,
          "replica_rejections": 0
        },
        "limit_in_bytes": 0
      }
    }
  }
}```

Sorry.

node-name-master1          99.9gb           33.04    66.9gb
node-name-master2          99.9gb           31.48    68.5gb
node-name-hot3              1.7tb            9.12     1.5tb
node-name-warm2             1.7tb           41.89       1tb
node-name-hot1              1.6tb           16.50     1.3tb
node-name-ml2              99.9gb           74.01    25.9gb
node-name-hot2              1.7tb           19.10     1.4tb
node-name-hot5              1.7tb           16.41     1.4tb
node-name-coordination3   140.9gb            4.51   134.5gb
node-name-coordination4   140.9gb            5.33   133.4gb
node-name-coordination2    99.9gb           31.48    68.5gb
node-name-master3          99.9gb           53.57    46.4gb
node-name-warm1             1.7tb           58.60   745.6gb
node-name-hot4              1.7tb           24.64     1.3tb
node-name-coordination1    99.9gb           33.04    66.9gb
node-name-ml1              99.9gb           49.21    50.7gb

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.