Facing data too large exception frequently

CircuitBreakingException[[parent] Data too large, data for [internal:index/shard/recovery/start_recovery] would be [7805417954/7.2gb], which is larger than the limit of [6941363404/6.4gb], real usage: [7805416568/7.2gb], new bytes reserved: [1386/1.3kb], usages [request=131136/128kb, fielddata=72627479/69.2mb, in_flight_requests=1386/1.3kb, model_inference=0/0b, accounting=8821828/8.4mb]]; ",

We are running 4 nodes with 1 master and 3 data nodes, each with 16 GB RAM. Every node has given heap size of 8 GB.
We have set "indices.breaker.total.limit" to 95% but still it is tripping that also.

Search queries are not fired very frequently (3-4 times a day) but data is pushed every second.

This in turns the cluster health to yellow frequently and sometimes the server is also becoming unresponsive.

Haven't found any concrete solution to this. Kindly advise on this. Happy to help with more information if needed.

You always want at least 3 master eligible nodes in a cluster for high availability.

What is the full output of the cluster stats API?

we have 3 master eligible nodes already.

Response of cluster stats API :

{
  "_nodes" : {
    "total" : 4,
    "successful" : 4,
    "failed" : 0
  },
  "cluster_name" : "search-es-stat",
  "cluster_uuid" : "7bR8lELUQ8ODwhiktMt3kw",
  "timestamp" : 1605773284226,
  "status" : "green",
  "indices" : {
    "count" : 28,
    "shards" : {
      "total" : 124,
      "primaries" : 62,
      "replication" : 1.0,
      "index" : {
        "shards" : {
          "min" : 2,
          "max" : 6,
          "avg" : 4.428571428571429
        },
        "primaries" : {
          "min" : 1,
          "max" : 3,
          "avg" : 2.2142857142857144
        },
        "replication" : {
          "min" : 1.0,
          "max" : 1.0,
          "avg" : 1.0
        }
      }
    },
    "docs" : {
      "count" : 124853718,
      "deleted" : 7417
    },
    "store" : {
      "size_in_bytes" : 290591538176,
      "reserved_in_bytes" : 0
    },
    "fielddata" : {
      "memory_size_in_bytes" : 495173568,
      "evictions" : 0
    },
    "query_cache" : {
      "memory_size_in_bytes" : 35390192,
      "total_count" : 18049,
      "hit_count" : 2273,
      "miss_count" : 15776,
      "cache_size" : 183,
      "cache_count" : 407,
      "evictions" : 224
    },
    "completion" : {
      "size_in_bytes" : 0
    },
    "segments" : {
      "count" : 2141,
      "memory_in_bytes" : 26204820,
      "terms_memory_in_bytes" : 19082496,
      "stored_fields_memory_in_bytes" : 2917608,
      "term_vectors_memory_in_bytes" : 0,
      "norms_memory_in_bytes" : 2270848,
      "points_memory_in_bytes" : 0,
      "doc_values_memory_in_bytes" : 1933868,
      "index_writer_memory_in_bytes" : 448976680,
      "version_map_memory_in_bytes" : 0,
      "fixed_bit_set_memory_in_bytes" : 3200,
      "max_unsafe_auto_id_timestamp" : 1605724201492,
      "file_sizes" : { }
    },
    "mappings" : {
      "field_types" : [
        {
          "name" : "binary",
          "count" : 9,
          "index_count" : 1
        },
        {
          "name" : "boolean",
          "count" : 135,
          "index_count" : 15
        },
        {
          "name" : "date",
          "count" : 89,
          "index_count" : 26
        },
        {
          "name" : "flattened",
          "count" : 9,
          "index_count" : 1
        },
        {
          "name" : "float",
          "count" : 13,
          "index_count" : 10
        },
        {
          "name" : "geo_point",
          "count" : 8,
          "index_count" : 8
        },
        {
          "name" : "integer",
          "count" : 104,
          "index_count" : 19
        },
        {
          "name" : "keyword",
          "count" : 678,
          "index_count" : 27
        },
        {
          "name" : "long",
          "count" : 99,
          "index_count" : 23
        },
        {
          "name" : "nested",
          "count" : 13,
          "index_count" : 3
        },
        {
          "name" : "object",
          "count" : 181,
          "index_count" : 19
        },
        {
          "name" : "short",
          "count" : 8,
          "index_count" : 8
        },
        {
          "name" : "text",
          "count" : 414,
          "index_count" : 17
        }
      ]
    },
    "analysis" : {
      "char_filter_types" : [ ],
      "tokenizer_types" : [ ],
      "filter_types" : [ ],
      "analyzer_types" : [
        {
          "name" : "snowball",
          "count" : 8,
          "index_count" : 8
        }
      ],
      "built_in_char_filters" : [ ],
      "built_in_tokenizers" : [ ],
      "built_in_filters" : [ ],
      "built_in_analyzers" : [
        {
          "name" : "english",
          "count" : 32,
          "index_count" : 8
        }
      ]
    }
  },
  "nodes" : {
    "count" : {
      "total" : 4,
      "coordinating_only" : 0,
      "data" : 3,
      "ingest" : 4,
      "master" : 3,
      "ml" : 4,
      "remote_cluster_client" : 4,
      "transform" : 3,
      "voting_only" : 0
    },
    "versions" : [
      "7.9.0"
    ],
    "os" : {
      "available_processors" : 16,
      "allocated_processors" : 16,
      "names" : [
        {
          "name" : "Linux",
          "count" : 4
        }
      ],
      "pretty_names" : [
        {
          "pretty_name" : "Ubuntu 18.04.2 LTS",
          "count" : 4
        }
      ],
      "mem" : {
        "total_in_bytes" : 65334640640,
        "free_in_bytes" : 1734565888,
        "used_in_bytes" : 63600074752,
        "free_percent" : 3,
        "used_percent" : 97
      }
    },
    "process" : {
      "cpu" : {
        "percent" : 11
      },
      "open_file_descriptors" : {
        "min" : 370,
        "max" : 806,
        "avg" : 673
      }
    },
    "jvm" : {
      "max_uptime_in_millis" : 155085674,
      "versions" : [
        {
          "version" : "14.0.1",
          "vm_name" : "OpenJDK 64-Bit Server VM",
          "vm_version" : "14.0.1+7",
          "vm_vendor" : "AdoptOpenJDK",
          "bundled_jdk" : true,
          "using_bundled_jdk" : true,
          "count" : 4
        }
      ],
      "mem" : {
        "heap_used_in_bytes" : 19355036056,
        "heap_max_in_bytes" : 32665239552
      },
      "threads" : 285
    },
    "fs" : {
      "total_in_bytes" : 684500848640,
      "free_in_bytes" : 382789365760,
      "available_in_bytes" : 353667121152
    },
    "plugins" : [
      {
        "name" : "discovery-ec2",
        "version" : "7.9.0",
        "elasticsearch_version" : "7.9.0",
        "java_version" : "1.8",
        "description" : "The EC2 discovery plugin allows to use AWS API for the unicast discovery mechanism.",
        "classname" : "org.elasticsearch.discovery.ec2.Ec2DiscoveryPlugin",
        "extended_plugins" : [ ],
        "has_native_controller" : false
      },
      {
        "name" : "repository-s3",
        "version" : "7.9.0",
        "elasticsearch_version" : "7.9.0",
        "java_version" : "1.8",
        "description" : "The S3 repository plugin adds S3 repositories",
        "classname" : "org.elasticsearch.repositories.s3.S3RepositoryPlugin",
        "extended_plugins" : [ ],
        "has_native_controller" : false
      }
    ],
    "network_types" : {
      "transport_types" : {
        "security4" : 4
      },
      "http_types" : {
        "security4" : 4
      }
    },
    "discovery_types" : {
      "zen" : 4
    },
    "packaging_types" : [
      {
        "flavor" : "default",
        "type" : "deb",
        "count" : 4
      }
    ],
    "ingest" : {
      "number_of_pipelines" : 2,
      "processor_stats" : {
        "gsub" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "script" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        }
      }
    }
  }
}

Version 7.9.0 has a memory leak so I would recommend upgrading to version 7.9.1+.

2 Likes

@Christian_Dahlqvist We have upgraded our ES version to 7.9.2. So far good. Will revert if some issue occurs.

Thanks for the help here!! :relieved:

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.