Hi,
We are using AWS Elasticsearch and we are having problems in our cluster when the JVM MemoryPressure reaches 75% in each of our nodes, at that moment a stop the world seems to happen and the nodes cause some unavailability for a few minutes.
We are not very sure because this is happening we have tried to increase the size of the cluster but we have only managed to delay the fall
Now we have 5 data nodes and 3 master nodes in c5.large instances and we have little data but many calls.
Cluster stats:
{
"_nodes": {
"total": 8,
"successful": 8,
"failed": 0
},
"cluster_name": "647845116050:images-live",
"timestamp": 1578658943560,
"status": "green",
"indices": {
"count": 2,
"shards": {
"total": 12,
"primaries": 6,
"replication": 1.0,
"index": {
"shards": {
"min": 2,
"max": 10,
"avg": 6.0
},
"primaries": {
"min": 1,
"max": 5,
"avg": 3.0
},
"replication": {
"min": 1.0,
"max": 1.0,
"avg": 1.0
}
}
},
"docs": {
"count": 2667,
"deleted": 0
},
"store": {
"size": "2.5mb",
"size_in_bytes": 2724052,
"throttle_time": "0s",
"throttle_time_in_millis": 0
},
"fielddata": {
"memory_size": "0b",
"memory_size_in_bytes": 0,
"evictions": 0
},
"query_cache": {
"memory_size": "0b",
"memory_size_in_bytes": 0,
"total_count": 0,
"hit_count": 0,
"miss_count": 0,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"completion": {
"size": "0b",
"size_in_bytes": 0
},
"segments": {
"count": 62,
"memory": "297.6kb",
"memory_in_bytes": 304809,
"terms_memory": "209kb",
"terms_memory_in_bytes": 214101,
"stored_fields_memory": "18.8kb",
"stored_fields_memory_in_bytes": 19312,
"term_vectors_memory": "0b",
"term_vectors_memory_in_bytes": 0,
"norms_memory": "18.7kb",
"norms_memory_in_bytes": 19200,
"points_memory": "180b",
"points_memory_in_bytes": 180,
"doc_values_memory": "50.7kb",
"doc_values_memory_in_bytes": 52016,
"index_writer_memory": "0b",
"index_writer_memory_in_bytes": 0,
"version_map_memory": "0b",
"version_map_memory_in_bytes": 0,
"fixed_bit_set": "0b",
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {}
}
},
"nodes": {
"count": {
"total": 8,
"data": 5,
"coordinating_only": 0,
"master": 3,
"ingest": 5
},
"versions": [
"5.5.2"
],
"os": {
"available_processors": 16,
"allocated_processors": 16,
"names": [
{
"count": 8
}
],
"mem": {
"total": "28.9gb",
"total_in_bytes": 31124226048,
"free": "1.3gb",
"free_in_bytes": 1493733376,
"used": "27.5gb",
"used_in_bytes": 29630492672,
"free_percent": 5,
"used_percent": 95
}
},
"process": {
"cpu": {
"percent": 5
},
"open_file_descriptors": {
"min": 791,
"max": 4069,
"avg": 1686
}
},
"jvm": {
"max_uptime": "23.2d",
"max_uptime_in_millis": 2012217486,
"mem": {
"heap_used": "4.9gb",
"heap_used_in_bytes": 5342301528,
"heap_max": "15.8gb",
"heap_max_in_bytes": 17040408576
},
"threads": 943
},
"fs": {
"total": "69.5gb",
"total_in_bytes": 74700369920,
"free": "62.8gb",
"free_in_bytes": 67496964096,
"available": "60.2gb",
"available_in_bytes": 64678391808
},
"network_types": {
"transport_types": {
"netty4": 8
},
"http_types": {
"filter-jetty": 8
}
}
}
}
Any idea why it happens? and how to fix it?