We were originally having 500+ nodes of the small size ( 8 core CPU & 15 GB memory
) and the goal was to go down to 150 instances by using the larger host ( 48 core 384 GB memory
)
we have 650TB data, 9000+ shards
avg write TPS - 1300 per second
avg read TPS - 2500 per second
So here how do I know the high IO usage % is because of an incorrect elastic search configuration or its a limitation of the instance type?
_cluster/stats
output
{
"_nodes" : {
"total" : 386,
"successful" : 386,
"failed" : 0
},
"cluster_name" : "mycluster",
"timestamp" : 1631282601562,
"status" : "green",
"indices" : {
"count" : 78,
"shards" : {
"total" : 9700,
"primaries" : 4850,
"replication" : 1.0,
"index" : {
"shards" : {
"min" : 2,
"max" : 700,
"avg" : 124.35897435897436
},
"primaries" : {
"min" : 1,
"max" : 350,
"avg" : 62.17948717948718
},
"replication" : {
"min" : 1.0,
"max" : 1.0,
"avg" : 1.0
}
}
},
"docs" : {
"count" : 534114907871,
"deleted" : 131472431078
},
"store" : {
"size" : "648.9tb",
"size_in_bytes" : 713527348139452
},
"fielddata" : {
"memory_size" : "8.4mb",
"memory_size_in_bytes" : 8908840,
"evictions" : 0
},
"query_cache" : {
"memory_size" : "2.7tb",
"memory_size_in_bytes" : 3012866611322,
"total_count" : 2570605016989,
"hit_count" : 228375108353,
"miss_count" : 2342229908636,
"cache_size" : 54326426,
"cache_count" : 3148666271,
"evictions" : 3094339845
},
"completion" : {
"size" : "0b",
"size_in_bytes" : 0
},
"segments" : {
"count" : 332846,
"memory" : "858.4gb",
"memory_in_bytes" : 921712993891,
"terms_memory" : "557.5gb",
"terms_memory_in_bytes" : 598697432174,
"stored_fields_memory" : "255.9gb",
"stored_fields_memory_in_bytes" : 274852502984,
"term_vectors_memory" : "0b",
"term_vectors_memory_in_bytes" : 0,
"norms_memory" : "218.3mb",
"norms_memory_in_bytes" : 228978304,
"points_memory" : "41.9gb",
"points_memory_in_bytes" : 45044298917,
"doc_values_memory" : "2.6gb",
"doc_values_memory_in_bytes" : 2889781512,
"index_writer_memory" : "20.1gb",
"index_writer_memory_in_bytes" : 21619911301,
"version_map_memory" : "31.6mb",
"version_map_memory_in_bytes" : 33167020,
"fixed_bit_set" : "309.7gb",
"fixed_bit_set_memory_in_bytes" : 332574300512,
"max_unsafe_auto_id_timestamp" : -1,
"file_sizes" : { }
}
},
"nodes" : {
"count" : {
"total" : 386,
"data" : 347,
"coordinating_only" : 36,
"master" : 3,
"ingest" : 0
},
"versions" : [
"6.4.1"
],
"os" : {
"available_processors" : 11016,
"allocated_processors" : 11016,
"names" : [
{
"name" : "Linux",
"count" : 386
}
],
"mem" : {
"total" : "81.8tb",
"total_in_bytes" : 89957789696000,
"free" : "2.7tb",
"free_in_bytes" : 3068981411840,
"used" : "79tb",
"used_in_bytes" : 86888808284160,
"free_percent" : 3,
"used_percent" : 97
}
},
"process" : {
"cpu" : {
"percent" : 2470
},
"open_file_descriptors" : {
"min" : 8926,
"max" : 12640,
"avg" : 10746
}
},
"jvm" : {
"max_uptime" : "48d",
"max_uptime_in_millis" : 4153686531,
"versions" : [
{
"version" : "1.8.0_302"
},
{
"version" : "1.8.0_242"
}
],
"mem" : {
"heap_used" : "13.8tb",
"heap_used_in_bytes" : 15240308363208,
"heap_max" : "28tb",
"heap_max_in_bytes" : 30870800171008
},
"threads" : 169437
},
"fs" : {
"total" : "5.4pb",
"total_in_bytes" : 6123033514786816,
"free" : "4.8pb",
"free_in_bytes" : 5405924939575296,
"available" : "4.5pb",
"available_in_bytes" : 5098669333790720
}
}
}