Parent Circuit Breaking Exception

We setup some monitoring to watch for parent circuit breaker trips in Elasticsearch and its going off like crazy. What I am trying to figure out is how to determine what is causing it. From what I have read and understand typically these go off when there is a risk of low memory for the JVM however that could be because we've written poor queries or it could be because we've undersized our Elasticsearch instances in terms of memory. I am trying to figure out how to determine which is the case and exactly what errors are occurring.

I tried to look at the Elasticsearch logs but I am not seeing anything being logged (the exceptions we're getting are in monitoring - not logging). Is there some way to enable logging of these errors - ideally with the query that triggered the alert?

Thanks
Brad

What is the full output of the cluster stats API?

{
  "_nodes" : {
    "total" : 3,
    "successful" : 3,
    "failed" : 0
  },
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "8UYG6ACfStSB0Cg8AYV2eA",
  "timestamp" : 1704141748697,
  "status" : "green",
  "indices" : {
    "count" : 33,
    "shards" : {
      "total" : 169,
      "primaries" : 65,
      "replication" : 1.6,
      "index" : {
        "shards" : {
          "min" : 2,
          "max" : 15,
          "avg" : 5.121212121212121
        },
        "primaries" : {
          "min" : 1,
          "max" : 5,
          "avg" : 1.9696969696969697
        },
        "replication" : {
          "min" : 1.0,
          "max" : 2.0,
          "avg" : 1.3333333333333333
        }
      }
    },
    "docs" : {
      "count" : 6081530,
      "deleted" : 3443651
    },
    "store" : {
      "size_in_bytes" : 26081668182,
      "total_data_set_size_in_bytes" : 26081668182,
      "reserved_in_bytes" : 0
    },
    "fielddata" : {
      "memory_size_in_bytes" : 0,
      "evictions" : 0
    },
    "query_cache" : {
      "memory_size_in_bytes" : 644081693,
      "total_count" : 13968276995,
      "hit_count" : 2894963500,
      "miss_count" : 11073313495,
      "cache_size" : 148063,
      "cache_count" : 43307755,
      "evictions" : 43159692
    },
    "completion" : {
      "size_in_bytes" : 66760908
    },
    "segments" : {
      "count" : 1207,
      "memory_in_bytes" : 72540238,
      "terms_memory_in_bytes" : 69695212,
      "stored_fields_memory_in_bytes" : 614952,
      "term_vectors_memory_in_bytes" : 0,
      "norms_memory_in_bytes" : 122624,
      "points_memory_in_bytes" : 0,
      "doc_values_memory_in_bytes" : 2107450,
      "index_writer_memory_in_bytes" : 3442548,
      "version_map_memory_in_bytes" : 6755,
      "fixed_bit_set_memory_in_bytes" : 70072,
      "max_unsafe_auto_id_timestamp" : 1703721882767,
      "file_sizes" : { }
    },
    "mappings" : {
      "field_types" : [
        {
          "name" : "boolean",
          "count" : 29,
          "index_count" : 13,
          "script_count" : 0
        },
        {
          "name" : "byte",
          "count" : 1,
          "index_count" : 1,
          "script_count" : 0
        },
        {
          "name" : "completion",
          "count" : 1,
          "index_count" : 1,
          "script_count" : 0
        },
        {
          "name" : "date",
          "count" : 88,
          "index_count" : 23,
          "script_count" : 0
        },
        {
          "name" : "float",
          "count" : 16,
          "index_count" : 8,
          "script_count" : 0
        },
        {
          "name" : "geo_point",
          "count" : 4,
          "index_count" : 4,
          "script_count" : 0
        },
        {
          "name" : "integer",
          "count" : 28,
          "index_count" : 11,
          "script_count" : 0
        },
        {
          "name" : "keyword",
          "count" : 430,
          "index_count" : 23,
          "script_count" : 0
        },
        {
          "name" : "long",
          "count" : 100,
          "index_count" : 24,
          "script_count" : 0
        },
        {
          "name" : "nested",
          "count" : 8,
          "index_count" : 8,
          "script_count" : 0
        },
        {
          "name" : "object",
          "count" : 95,
          "index_count" : 14,
          "script_count" : 0
        },
        {
          "name" : "text",
          "count" : 62,
          "index_count" : 21,
          "script_count" : 0
        },
        {
          "name" : "version",
          "count" : 8,
          "index_count" : 8,
          "script_count" : 0
        }
      ],
      "runtime_field_types" : [ ]
    },
    "analysis" : {
      "char_filter_types" : [ ],
      "tokenizer_types" : [ ],
      "filter_types" : [
        {
          "name" : "asciifolding",
          "count" : 4,
          "index_count" : 4
        }
      ],
      "analyzer_types" : [
        {
          "name" : "custom",
          "count" : 5,
          "index_count" : 5
        }
      ],
      "built_in_char_filters" : [ ],
      "built_in_tokenizers" : [
        {
          "name" : "standard",
          "count" : 5,
          "index_count" : 5
        }
      ],
      "built_in_filters" : [
        {
          "name" : "asciifolding",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "lowercase",
          "count" : 5,
          "index_count" : 5
        }
      ],
      "built_in_analyzers" : [
        {
          "name" : "english",
          "count" : 1,
          "index_count" : 1
        }
      ]
    },
    "versions" : [
      {
        "version" : "7.15.0",
        "index_count" : 15,
        "primary_shard_count" : 31,
        "total_primary_bytes" : 2104163606
      },
      {
        "version" : "7.16.3",
        "index_count" : 18,
        "primary_shard_count" : 34,
        "total_primary_bytes" : 6603010429
      }
    ]
  },
  "nodes" : {
    "count" : {
      "total" : 3,
      "coordinating_only" : 0,
      "data" : 3,
      "data_cold" : 3,
      "data_content" : 3,
      "data_frozen" : 3,
      "data_hot" : 3,
      "data_warm" : 3,
      "ingest" : 3,
      "master" : 3,
      "ml" : 3,
      "remote_cluster_client" : 3,
      "transform" : 3,
      "voting_only" : 0
    },
    "versions" : [
      "7.16.3"
    ],
    "os" : {
      "available_processors" : 6,
      "allocated_processors" : 6,
      "names" : [
        {
          "name" : "Linux",
          "count" : 3
        }
      ],
      "pretty_names" : [
        {
          "pretty_name" : "Ubuntu 20.04.3 LTS",
          "count" : 3
        }
      ],
      "architectures" : [
        {
          "arch" : "amd64",
          "count" : 3
        }
      ],
      "mem" : {
        "total_in_bytes" : 12884901888,
        "free_in_bytes" : 147456,
        "used_in_bytes" : 12884754432,
        "free_percent" : 0,
        "used_percent" : 100
      }
    },
    "process" : {
      "cpu" : {
        "percent" : 192
      },
      "open_file_descriptors" : {
        "min" : 886,
        "max" : 912,
        "avg" : 900
      }
    },
    "jvm" : {
      "max_uptime_in_millis" : 1412582983,
      "versions" : [
        {
          "version" : "17.0.1",
          "vm_name" : "OpenJDK 64-Bit Server VM",
          "vm_version" : "17.0.1+12",
          "vm_vendor" : "Eclipse Adoptium",
          "bundled_jdk" : true,
          "using_bundled_jdk" : true,
          "count" : 3
        }
      ],
      "mem" : {
        "heap_used_in_bytes" : 3851070152,
        "heap_max_in_bytes" : 6442450944
      },
      "threads" : 194
    },
    "fs" : {
      "total_in_bytes" : 126424350720,
      "free_in_bytes" : 79459033088,
      "available_in_bytes" : 73223979008
    },
    "plugins" : [ ],
    "network_types" : {
      "transport_types" : {
        "security4" : 3
      },
      "http_types" : {
        "security4" : 3
      }
    },
    "discovery_types" : {
      "zen" : 3
    },
    "packaging_types" : [
      {
        "flavor" : "default",
        "type" : "docker",
        "count" : 3
      }
    ],
    "ingest" : {
      "number_of_pipelines" : 2,
      "processor_stats" : {
        "gsub" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "script" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        }
      }
    }
  }
}

@Christian_Dahlqvist any ideas based on the above? I’m hesitant to throw hardware (memory) at the problem if the issue is poorly written queries. On the other hand off the cluster is just under provisioned I don’t want to bug our devs.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.