Hi
Please share any suggestion how I can tune up the performance of elasic cluster
This solution was deployed on 3 bare metal machine. Each of particular host has a 256 GB RAM
so we distributed memory through such configuration:
5 hot node - per 16GB
9 warm node per 8GB
2 master node per 16GB
1 ingest node per 8GB
for summarize we have 51 nodes in such cluster
but we observe as permanent lost of transfer data from logstash to elastic due to low performance (high jvm heap)
What do You suggest where we should explore? What parameter we should change it for brings better perfomance?
I hope that this case can be addressed with redesign parameter and not HW dimensioning.
:error=>{"type"=>"circuit_breaking_exception", "reason"=>"[parent] Data too large, data for [indices:data/write/bulk[s]] would be [8388960200/7.8gb], which is larger than the limit of [8160437862/7.5gb], real usage: [8384614552/7.8gb], new bytes reserved: [4345648/4.1mb], usages [eql_sequence=0/0b, model_inference=0/0b, inflight_requests=5287596/5mb, request=4960256/4.7mb, fielddata=1666309486/1.5gb]", "bytes_wanted"=>8388960200, "bytes_limit"=>8160437862, "durability"=>"PERMANENT"}}
GET _cluster/stats?pretty&human
{
"_nodes" : {
"total" : 51,
"successful" : 51,
"failed" : 0
},
"cluster_name" : "elk_cluster",
"cluster_uuid" : "XDEw48F5SEu3KcS3_jDNcw",
"timestamp" : 1672226386118,
"status" : "green",
"indices" : {
"count" : 3501,
"shards" : {
"total" : 19721,
"primaries" : 16816,
"replication" : 0.1727521408182683,
"index" : {
"shards" : {
"min" : 1,
"max" : 20,
"avg" : 5.632962010854042
},
"primaries" : {
"min" : 1,
"max" : 20,
"avg" : 4.803199085975436
},
"replication" : {
"min" : 0.0,
"max" : 1.0,
"avg" : 0.6006855184233076
}
}
},
"docs" : {
"count" : 133632656514,
"deleted" : 10988036
},
"store" : {
"size" : "58tb",
"size_in_bytes" : 63874097193069,
"total_data_set_size" : "58tb",
"total_data_set_size_in_bytes" : 63874097193069,
"reserved" : "0b",
"reserved_in_bytes" : 0
},
"fielddata" : {
"memory_size" : "1gb",
"memory_size_in_bytes" : 1138327360,
"evictions" : 0
},
"query_cache" : {
"memory_size" : "472.4mb",
"memory_size_in_bytes" : 495365253,
"total_count" : 8467850,
"hit_count" : 163210,
"miss_count" : 8304640,
"cache_size" : 19893,
"cache_count" : 20045,
"evictions" : 152
},
"completion" : {
"size" : "0b",
"size_in_bytes" : 0
},
"segments" : {
"count" : 231012,
"memory" : "0b",
"memory_in_bytes" : 0,
"terms_memory" : "0b",
"terms_memory_in_bytes" : 0,
"stored_fields_memory" : "0b",
"stored_fields_memory_in_bytes" : 0,
"term_vectors_memory" : "0b",
"term_vectors_memory_in_bytes" : 0,
"norms_memory" : "0b",
"norms_memory_in_bytes" : 0,
"points_memory" : "0b",
"points_memory_in_bytes" : 0,
"doc_values_memory" : "0b",
"doc_values_memory_in_bytes" : 0,
"index_writer_memory" : "2.5gb",
"index_writer_memory_in_bytes" : 2738339768,
"version_map_memory" : "28.3mb",
"version_map_memory_in_bytes" : 29777657,
"fixed_bit_set" : "62mb",
"fixed_bit_set_memory_in_bytes" : 65032976,
"max_unsafe_auto_id_timestamp" : 1672225946458,
"file_sizes" : { }
},
"mappings" : {
"field_types" : [
{
"name" : "alias",
"count" : 104,
"index_count" : 8,
"script_count" : 0
},
{
"name" : "binary",
"count" : 2,
"index_count" : 2,
"script_count" : 0
},
{
"name" : "boolean",
"count" : 1749,
"index_count" : 335,
"script_count" : 0
},
{
"name" : "byte",
"count" : 2566,
"index_count" : 302,
"script_count" : 0
},
{
"name" : "constant_keyword",
"count" : 30,
"index_count" : 10,
"script_count" : 0
},
{
"name" : "date",
"count" : 7068,
"index_count" : 3466,
"script_count" : 0
},
{
"name" : "date_nanos",
"count" : 2,
"index_count" : 2,
"script_count" : 0
},
{
"name" : "date_range",
"count" : 2,
"index_count" : 2,
"script_count" : 0
},
{
"name" : "double",
"count" : 346,
"index_count" : 10,
"script_count" : 0
},
{
"name" : "double_range",
"count" : 2,
"index_count" : 2,
"script_count" : 0
},
{
"name" : "flattened",
"count" : 344,
"index_count" : 8,
"script_count" : 0
},
{
"name" : "float",
"count" : 532936,
"index_count" : 1000,
"script_count" : 0
},
{
"name" : "float_range",
"count" : 2,
"index_count" : 2,
"script_count" : 0
},
{
"name" : "geo_point",
"count" : 2089,
"index_count" : 1144,
"script_count" : 0
},
{
"name" : "geo_shape",
"count" : 2,
"index_count" : 2,
"script_count" : 0
},
{
"name" : "half_float",
"count" : 1678,
"index_count" : 826,
"script_count" : 0
},
{
"name" : "integer",
"count" : 847,
"index_count" : 296,
"script_count" : 0
},
{
"name" : "integer_range",
"count" : 2,
"index_count" : 2,
"script_count" : 0
},
{
"name" : "ip",
"count" : 4518,
"index_count" : 1145,
"script_count" : 0
},
{
"name" : "ip_range",
"count" : 2,
"index_count" : 2,
"script_count" : 0
},
{
"name" : "keyword",
"count" : 106978,
"index_count" : 3468,
"script_count" : 0
},
{
"name" : "long",
"count" : 2673403,
"index_count" : 2320,
"script_count" : 0
},
{
"name" : "long_range",
"count" : 2,
"index_count" : 2,
"script_count" : 0
},
{
"name" : "match_only_text",
"count" : 504,
"index_count" : 8,
"script_count" : 0
},
{
"name" : "nested",
"count" : 161,
"index_count" : 19,
"script_count" : 0
},
{
"name" : "object",
"count" : 875162,
"index_count" : 2479,
"script_count" : 0
},
{
"name" : "scaled_float",
"count" : 10,
"index_count" : 8,
"script_count" : 0
},
{
"name" : "shape",
"count" : 2,
"index_count" : 2,
"script_count" : 0
},
{
"name" : "short",
"count" : 2716,
"index_count" : 334,
"script_count" : 0
},
{
"name" : "text",
"count" : 22259,
"index_count" : 2153,
"script_count" : 0
},
{
"name" : "version",
"count" : 4,
"index_count" : 4,
"script_count" : 0
},
{
"name" : "wildcard",
"count" : 136,
"index_count" : 8,
"script_count" : 0
}
],
"runtime_field_types" : [ ]
},
"analysis" : {
"char_filter_types" : [ ],
"tokenizer_types" : [ ],
"filter_types" : [ ],
"analyzer_types" : [ ],
"built_in_char_filters" : [ ],
"built_in_tokenizers" : [ ],
"built_in_filters" : [ ],
"built_in_analyzers" : [ ]
},
"versions" : [
{
"version" : "8.1.0",
"index_count" : 3501,
"primary_shard_count" : 16816,
"total_primary_size" : "44.2tb",
"total_primary_bytes" : 48667434723612
}
]
},
"nodes" : {
"count" : {
"total" : 51,
"coordinating_only" : 0,
"data" : 0,
"data_cold" : 0,
"data_content" : 15,
"data_frozen" : 0,
"data_hot" : 15,
"data_warm" : 27,
"ingest" : 3,
"master" : 6,
"ml" : 0,
"remote_cluster_client" : 0,
"transform" : 0,
"voting_only" : 0
},
"versions" : [
"8.1.0"
],
"os" : {
"available_processors" : 3264,
"allocated_processors" : 3264,
"names" : [
{
"name" : "Linux",
"count" : 51
}
],
"pretty_names" : [
{
"pretty_name" : "Ubuntu 20.04.4 LTS",
"count" : 51
}
],
"architectures" : [
{
"arch" : "amd64",
"count" : 51
}
],
"mem" : {
"total" : "576gb",
"total_in_bytes" : 618475290624,
"adjusted_total" : "576gb",
"adjusted_total_in_bytes" : 618475290624,
"free" : "74.7gb",
"free_in_bytes" : 80283070464,
"used" : "501.2gb",
"used_in_bytes" : 538192220160,
"free_percent" : 13,
"used_percent" : 87
}
},
"process" : {
"cpu" : {
"percent" : 82
},
"open_file_descriptors" : {
"min" : 1574,
"max" : 6860,
"avg" : 5537
}
},
"jvm" : {
"max_uptime" : "1.1h",
"max_uptime_in_millis" : 4290610,
"versions" : [
{
"version" : "17.0.2",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "17.0.2+8",
"vm_vendor" : "Eclipse Adoptium",
"bundled_jdk" : true,
"using_bundled_jdk" : true,
"count" : 51
}
],
"mem" : {
"heap_used" : "177.7gb",
"heap_used_in_bytes" : 190838722568,
"heap_max" : "288gb",
"heap_max_in_bytes" : 309237645312
},
"threads" : 12352
},
"fs" : {
"total" : "142.7tb",
"total_in_bytes" : 156914354835456,
"free" : "69.8tb",
"free_in_bytes" : 76792617918464,
"available" : "69.8tb",
"available_in_bytes" : 76792617918464
},
"plugins" : [ ],
"network_types" : {
"transport_types" : {
"security4" : 51
},
"http_types" : {
"security4" : 51
}
},
"discovery_types" : {
"multi-node" : 51
},
"packaging_types" : [
{
"flavor" : "default",
"type" : "docker",
"count" : 51
}
],
"ingest" : {
"number_of_pipelines" : 10,
"processor_stats" : {
"append" : {
"count" : 3957,
"failed" : 0,
"current" : 0,
"time" : "10ms",
"time_in_millis" : 10
},
"conditional" : {
"count" : 231347,
"failed" : 0,
"current" : 0,
"time" : "3.1s",
"time_in_millis" : 3165
},
"convert" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
},
"date" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
},
"geoip" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
},
"grok" : {
"count" : 681061,
"failed" : 172833,
"current" : 0,
"time" : "45s",
"time_in_millis" : 45001
},
"json" : {
"count" : 5534,
"failed" : 1577,
"current" : 0,
"time" : "334ms",
"time_in_millis" : 334
},
"remove" : {
"count" : 460161,
"failed" : 231347,
"current" : 0,
"time" : "2.2s",
"time_in_millis" : 2292
},
"rename" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
},
"script" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
},
"set" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
},
"set_security_user" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
},
"uri_parts" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
},
"user_agent" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time" : "0s",
"time_in_millis" : 0
}
}
},
"indexing_pressure" : {
"memory" : {
"current" : {
"combined_coordinating_and_primary" : "0b",
"combined_coordinating_and_primary_in_bytes" : 0,
"coordinating" : "0b",
"coordinating_in_bytes" : 0,
"primary" : "0b",
"primary_in_bytes" : 0,
"replica" : "0b",
"replica_in_bytes" : 0,
"all" : "0b",
"all_in_bytes" : 0
},
"total" : {
"combined_coordinating_and_primary" : "0b",
"combined_coordinating_and_primary_in_bytes" : 0,
"coordinating" : "0b",
"coordinating_in_bytes" : 0,
"primary" : "0b",
"primary_in_bytes" : 0,
"replica" : "0b",
"replica_in_bytes" : 0,
"all" : "0b",
"all_in_bytes" : 0,
"coordinating_rejections" : 0,
"primary_rejections" : 0,
"replica_rejections" : 0
},
"limit" : "0b",
"limit_in_bytes" : 0
}
}
}
}