Where are memories go?

Our clients encountered errors below recently when performing index and query requests:

org.elasticsearch.client.ResponseException: org.elasticsearch.client.ResponseException: method [POST], host [http://192.168.12.171:9201], URI [/_bulk], status line [HTTP/1.1 429 Too Many Requests]

{"error":{"root_cause":[{"type":"circuit_breaking_exception","reason":"[parent] Data too large, data for [<http_request>] would be [30299660794/28.2gb], which is larger than the limit of [28991029248/27gb], real usage: [30299434000/28.2gb], new bytes reserved: [226794/221.4kb], usages [eql_sequence=0/0b, model_inference=0/0b, inflight_requests=226794/221.4kb, request=0/0b, fielddata=389538262/371.4mb]","bytes_wanted":30299660794,"bytes_limit":28991029248,"durability":"PERMANENT"}],"type":"circuit_breaking_exception","reason":"[parent] Data too large, data for [<http_request>] would be [30299660794/28.2gb], which is larger than the limit of [28991029248/27gb], real usage: [30299434000/28.2gb], new bytes reserved: [226794/221.4kb], usages [eql_sequence=0/0b, model_inference=0/0b, inflight_requests=226794/221.4kb, request=0/0b, fielddata=389538262/371.4mb]","bytes_wanted":30299660794,"bytes_limit":28991029248,"durability":"PERMANENT"},"status":429}

We checked the nodes stat, here is result:

The image shows that every node has consumed over 20G heap memory, but in the right columns,detailed memory summed is less then 5G, where are the left memories go? How can I know where it is consumed?

FYI: cluster version is 8.9.0.

Please provide the full output of the cluster stats API.

{
  "_nodes": {
    "total": 24,
    "successful": 24,
    "failed": 0
  },
  "cluster_name": "es-cluster",
  "cluster_uuid": "fVy**********tYK6CIg",
  "timestamp": 1694079673141,
  "status": "green",
  "indices": {
    "count": 876,
    "shards": {
      "total": 5352,
      "primaries": 2655,
      "replication": 1.015819209039548,
      "index": {
        "shards": {
          "min": 2,
          "max": 40,
          "avg": 6.109589041095891
        },
        "primaries": {
          "min": 1,
          "max": 20,
          "avg": 3.030821917808219
        },
        "replication": {
          "min": 1,
          "max": 2,
          "avg": 1.047945205479452
        }
      }
    },
    "docs": {
      "count": 199518635122,
      "deleted": 8779697346
    },
    "store": {
      "size": "26.9tb",
      "size_in_bytes": 29657955292042,
      "total_data_set_size": "26.9tb",
      "total_data_set_size_in_bytes": 29657955292042,
      "reserved": "0b",
      "reserved_in_bytes": 0
    },
    "fielddata": {
      "memory_size": "10.9gb",
      "memory_size_in_bytes": 11742886328,
      "evictions": 0,
      "global_ordinals": {
        "build_time": "15.2h",
        "build_time_in_millis": 54995412
      }
    },
    "query_cache": {
      "memory_size": "39.2gb",
      "memory_size_in_bytes": 42117541378,
      "total_count": 427951485,
      "hit_count": 84300456,
      "miss_count": 343651029,
      "cache_size": 3326847,
      "cache_count": 7019725,
      "evictions": 3692878
    },
    "completion": {
      "size": "0b",
      "size_in_bytes": 0
    },
    "segments": {
      "count": 92948,
      "memory": "0b",
      "memory_in_bytes": 0,
      "terms_memory": "0b",
      "terms_memory_in_bytes": 0,
      "stored_fields_memory": "0b",
      "stored_fields_memory_in_bytes": 0,
      "term_vectors_memory": "0b",
      "term_vectors_memory_in_bytes": 0,
      "norms_memory": "0b",
      "norms_memory_in_bytes": 0,
      "points_memory": "0b",
      "points_memory_in_bytes": 0,
      "doc_values_memory": "0b",
      "doc_values_memory_in_bytes": 0,
      "index_writer_memory": "619.9mb",
      "index_writer_memory_in_bytes": 650109214,
      "version_map_memory": "9.2mb",
      "version_map_memory_in_bytes": 9718290,
      "fixed_bit_set": "81.6gb",
      "fixed_bit_set_memory_in_bytes": 87670278240,
      "max_unsafe_auto_id_timestamp": 1694077901575,
      "file_sizes": {}
    },
    "mappings": {
      "total_field_count": 46008,
      "total_deduplicated_field_count": 6420,
      "total_deduplicated_mapping_size": "50.4kb",
      "total_deduplicated_mapping_size_in_bytes": 51621,
      "field_types": [
        {
          "name": "alias",
          "count": 76,
          "index_count": 1,
          "script_count": 0
        },
        {
          "name": "binary",
          "count": 1,
          "index_count": 1,
          "script_count": 0
        },
        {
          "name": "boolean",
          "count": 175,
          "index_count": 49,
          "script_count": 0
        },
        {
          "name": "byte",
          "count": 2580,
          "index_count": 360,
          "script_count": 0
        },
        {
          "name": "constant_keyword",
          "count": 6,
          "index_count": 2,
          "script_count": 0
        },
        {
          "name": "date",
          "count": 2468,
          "index_count": 867,
          "script_count": 0
        },
        {
          "name": "date_range",
          "count": 6,
          "index_count": 6,
          "script_count": 0
        },
        {
          "name": "flattened",
          "count": 39,
          "index_count": 6,
          "script_count": 0
        },
        {
          "name": "float",
          "count": 150,
          "index_count": 20,
          "script_count": 0
        },
        {
          "name": "geo_point",
          "count": 25,
          "index_count": 4,
          "script_count": 0
        },
        {
          "name": "half_float",
          "count": 68,
          "index_count": 15,
          "script_count": 0
        },
        {
          "name": "integer",
          "count": 6196,
          "index_count": 482,
          "script_count": 0
        },
        {
          "name": "ip",
          "count": 53,
          "index_count": 5,
          "script_count": 0
        },
        {
          "name": "keyword",
          "count": 12925,
          "index_count": 614,
          "script_count": 0
        },
        {
          "name": "long",
          "count": 10625,
          "index_count": 700,
          "script_count": 0
        },
        {
          "name": "match_only_text",
          "count": 270,
          "index_count": 3,
          "script_count": 0
        },
        {
          "name": "nested",
          "count": 3984,
          "index_count": 431,
          "script_count": 0
        },
        {
          "name": "object",
          "count": 1972,
          "index_count": 66,
          "script_count": 0
        },
        {
          "name": "scaled_float",
          "count": 24,
          "index_count": 6,
          "script_count": 0
        },
        {
          "name": "text",
          "count": 4291,
          "index_count": 826,
          "script_count": 0
        },
        {
          "name": "version",
          "count": 8,
          "index_count": 8,
          "script_count": 0
        },
        {
          "name": "wildcard",
          "count": 66,
          "index_count": 3,
          "script_count": 0
        }
      ],
      "runtime_field_types": []
    },
    "analysis": {
      "char_filter_types": [],
      "tokenizer_types": [],
      "filter_types": [],
      "analyzer_types": [],
      "built_in_char_filters": [],
      "built_in_tokenizers": [],
      "built_in_filters": [],
      "built_in_analyzers": [
        {
          "name": "english",
          "count": 1,
          "index_count": 1
        }
      ]
    },
    "versions": [
      {
        "version": "7.2.0",
        "index_count": 81,
        "primary_shard_count": 81,
        "total_primary_size": "1.9gb",
        "total_primary_bytes": 2097124718
      },
      {
        "version": "7.16.2",
        "index_count": 94,
        "primary_shard_count": 168,
        "total_primary_size": "873.1gb",
        "total_primary_bytes": 937538466397
      },
      {
        "version": "7.17.10",
        "index_count": 14,
        "primary_shard_count": 38,
        "total_primary_size": "44.1mb",
        "total_primary_bytes": 46319041
      },
      {
        "version": "8.9.0",
        "index_count": 687,
        "primary_shard_count": 2368,
        "total_primary_size": "12.6tb",
        "total_primary_bytes": 13891625586976
      }
    ],
    "search": {
      "total": 7802998,
      "queries": {
        "bool": 3661759,
        "prefix": 6719,
        "match": 98186,
        "range": 2593430,
        "nested": 2512,
        "wildcard": 37,
        "match_phrase": 110558,
        "terms": 4913959,
        "match_phrase_prefix": 25,
        "match_all": 1,
        "ids": 5946,
        "exists": 793436,
        "term": 2966498,
        "simple_query_string": 106883
      },
      "sections": {
        "highlight": 1,
        "search_after": 76,
        "stored_fields": 1,
        "runtime_mappings": 5135,
        "query": 7790132,
        "script_fields": 1,
        "pit": 3404,
        "_source": 1908225,
        "terminate_after": 1826,
        "fields": 30983,
        "collapse": 39748,
        "aggs": 321766
      }
    }
  },
  "nodes": {
    "count": {
      "total": 24,
      "coordinating_only": 0,
      "data": 24,
      "data_cold": 5,
      "data_content": 5,
      "data_frozen": 5,
      "data_hot": 5,
      "data_warm": 5,
      "index": 0,
      "ingest": 24,
      "master": 5,
      "ml": 5,
      "remote_cluster_client": 5,
      "search": 0,
      "transform": 5,
      "voting_only": 0
    },
    "versions": [
      "8.9.0"
    ],
    "os": {
      "available_processors": 1056,
      "allocated_processors": 1056,
      "names": [
        {
          "name": "Linux",
          "count": 24
        }
      ],
      "pretty_names": [
        {
          "pretty_name": "Ubuntu 20.04.6 LTS",
          "count": 24
        }
      ],
      "architectures": [
        {
          "arch": "amd64",
          "count": 24
        }
      ],
      "mem": {
        "total": "5.8tb",
        "total_in_bytes": 6479857205248,
        "adjusted_total": "5.8tb",
        "adjusted_total_in_bytes": 6479857205248,
        "free": "104.7gb",
        "free_in_bytes": 112457527296,
        "used": "5.7tb",
        "used_in_bytes": 6367399677952,
        "free_percent": 2,
        "used_percent": 98
      }
    },
    "process": {
      "cpu": {
        "percent": 11
      },
      "open_file_descriptors": {
        "min": 2754,
        "max": 3029,
        "avg": 2893
      }
    },
    "jvm": {
      "max_uptime": "37d",
      "max_uptime_in_millis": 3202137150,
      "versions": [
        {
          "version": "20.0.2",
          "vm_name": "OpenJDK 64-Bit Server VM",
          "vm_version": "20.0.2+9-78",
          "vm_vendor": "Oracle Corporation",
          "bundled_jdk": true,
          "using_bundled_jdk": true,
          "count": 24
        }
      ],
      "mem": {
        "heap_used": "478gb",
        "heap_used_in_bytes": 513252209768,
        "heap_max": "720gb",
        "heap_max_in_bytes": 773094113280
      },
      "threads": 6686
    },
    "fs": {
      "total": "1pb",
      "total_in_bytes": 1198060939788288,
      "free": "974tb",
      "free_in_bytes": 1070989627793408,
      "available": "974tb",
      "available_in_bytes": 1070989627793408
    },
    "plugins": [],
    "network_types": {
      "transport_types": {
        "netty4": 24
      },
      "http_types": {
        "netty4": 24
      }
    },
    "discovery_types": {
      "multi-node": 24
    },
    "packaging_types": [
      {
        "flavor": "default",
        "type": "docker",
        "count": 24
      }
    ],
    "ingest": {
      "number_of_pipelines": 3,
      "processor_stats": {
        "dot_expander": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "foreach": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "geoip": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "json": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "pipeline": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "remove": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "rename": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "set": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "uri_parts": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        },
        "user_agent": {
          "count": 0,
          "failed": 0,
          "current": 0,
          "time": "0s",
          "time_in_millis": 0
        }
      }
    },
    "indexing_pressure": {
      "memory": {
        "current": {
          "combined_coordinating_and_primary": "0b",
          "combined_coordinating_and_primary_in_bytes": 0,
          "coordinating": "0b",
          "coordinating_in_bytes": 0,
          "primary": "0b",
          "primary_in_bytes": 0,
          "replica": "0b",
          "replica_in_bytes": 0,
          "all": "0b",
          "all_in_bytes": 0
        },
        "total": {
          "combined_coordinating_and_primary": "0b",
          "combined_coordinating_and_primary_in_bytes": 0,
          "coordinating": "0b",
          "coordinating_in_bytes": 0,
          "primary": "0b",
          "primary_in_bytes": 0,
          "replica": "0b",
          "replica_in_bytes": 0,
          "all": "0b",
          "all_in_bytes": 0,
          "coordinating_rejections": 0,
          "primary_rejections": 0,
          "replica_rejections": 0
        },
        "limit": "0b",
        "limit_in_bytes": 0
      }
    }
  },
  "snapshots": {
    "current_counts": {
      "snapshots": 0,
      "shard_snapshots": 0,
      "snapshot_deletions": 0,
      "concurrent_operations": 0,
      "cleanups": 0
    },
    "repositories": {}
  }
}

we use k8s to run this cluster, 4 pods per server, so the os memory info is not correct from this api.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.