Hello Christian,
What is your use case?
We are aggregating on some fields, that time cluster reaches to heavy heap size.
Can you please provide the full output of the _cluster/stats API?
{
"_nodes": {
"total": 13,
"successful": 13,
"failed": 0
},
"cluster_name": "IMIConnect-Prod-new-Cluster",
"timestamp": 1584508821487,
"status": "green",
"indices": {
"count": 1590,
"shards": {
"total": 3836,
"primaries": 1918,
"replication": 1,
"index": {
"shards": {
"min": 2,
"max": 16,
"avg": 2.412578616352201
},
"primaries": {
"min": 1,
"max": 8,
"avg": 1.2062893081761005
},
"replication": {
"min": 1,
"max": 1,
"avg": 1
}
}
},
"docs": {
"count": 6989638358,
"deleted": 186400
},
"store": {
"size_in_bytes": 10111682273006,
"throttle_time_in_millis": 0
},
"fielddata": {
"memory_size_in_bytes": 5395428088,
"evictions": 0
},
"query_cache": {
"memory_size_in_bytes": 3720906227,
"total_count": 626199871,
"hit_count": 46551285,
"miss_count": 579648586,
"cache_size": 805217,
"cache_count": 4873902,
"evictions": 4068685
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 61713,
"memory_in_bytes": 25661172525,
"terms_memory_in_bytes": 21678628843,
"stored_fields_memory_in_bytes": 3091498736,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 7895680,
"points_memory_in_bytes": 568946834,
"doc_values_memory_in_bytes": 314202432,
"index_writer_memory_in_bytes": 413129724,
"version_map_memory_in_bytes": 27899398,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": 9223372036854776000,
"file_sizes": {}
}
},
"nodes": {
"count": {
"total": 13,
"data": 8,
"coordinating_only": 2,
"master": 3,
"ingest": 11
},
"versions": [
"5.6.4"
],
"os": {
"available_processors": 84,
"allocated_processors": 84,
"names": [
{
"name": "Linux",
"count": 13
}
],
"mem": {
"total_in_bytes": 699968139264,
"free_in_bytes": 47940886528,
"used_in_bytes": 652027252736,
"free_percent": 7,
"used_percent": 93
}
},
"process": {
"cpu": {
"percent": 85
},
"open_file_descriptors": {
"min": 439,
"max": 1545,
"avg": 1120
}
},
"jvm": {
"max_uptime_in_millis": 39984204745,
"versions": [
{
"version": "1.8.0_191",
"vm_name": "OpenJDK 64-Bit Server VM",
"vm_version": "25.191-b12",
"vm_vendor": "Oracle Corporation",
"count": 10
},
{
"version": "1.8.0_222",
"vm_name": "OpenJDK 64-Bit Server VM",
"vm_version": "25.222-b10",
"vm_vendor": "Oracle Corporation",
"count": 2
},
{
"version": "1.8.0_201",
"vm_name": "OpenJDK 64-Bit Server VM",
"vm_version": "25.201-b09",
"vm_vendor": "Oracle Corporation",
"count": 1
}
],
"mem": {
"heap_used_in_bytes": 172083151128,
"heap_max_in_bytes": 337496506368
},
"threads": 1269
},
"fs": {
"total_in_bytes": 19861279350784,
"free_in_bytes": 9654299492352,
"available_in_bytes": 9654299492352
},
"plugins": ,
"network_types": {
"transport_types": {
"netty4": 13
},
"http_types": {
"netty4": 13
}
}
}
}
Which version of Elasticsearch are you using?
Elasticsearch version : 5.6.4
How many nodes do you have in the cluster?
13 Nodes,
in which 2 -Coord, 3 -Master, 8 -Datanodes,
What type and specification hardware is it running on? Are you using local disks or networked storage? SSDs or HDDs?
All nodes are deployed on AWS, disk type is SSD
What type of indexing and query load is the cluster under?
Index Name: imiconnect_sfe_custom_node_logs
One Sample record:
{
"_index": "imiconnect_sfe_custom_node_logs-2020-03-18",
"_type": "IMICONNECT_SFE_CUSTOM_NODE_LOGS",
"_id": "123a3d7e457-ce32-40fd-938f-9a73f6919a9c",
"_version": 1,
"_score": null,
"_source": {
"request": "7657HJFa2k/7VgLW0MmboVj0O+oPuuwvdedOi1Uv/I0U230jV4Ks7Su1StWzhZa27sR9nPsL8s0S\nlJvr5BPxSPYg3ISpjPC9Sn4IOekgW3lm7zs199xLn4JvqnReQ/F7eKjSigqfwllGZxJZ+G+Md4nB\niWUUqtEZBQH4EkPM6E2sc63jlZxSJ4yz4EE0QSawY9NQsNOhE2XFUUHJQ+A26YN3UB1GLHaRi0Ln\nGmxQrQYRSG3/IDW7D0T+EDmSeRym/229M0X",
"response_code": "200 OK",
"integration_id": "7077",
"nodetid": "232fa288-9421-412d-808c-be3af45847ac",
"service_name": "PPB",
"flow_name": "BulkOptin",
"trans_id": "03a47810-8674-45a9-9b4e-5f90e84b776a_12856_26120",
"request_method": "POST",
"response_datetime": "2020-03-18T04:38:55.821Z",
"client_id": 1018,
"@timestamp": "2020-03-18T04:40:58.450Z",
"method_name": "BulkOptIn_PPB",
"method_id": "579",
"flow_id": "10864",
"response": "Q6yelRzPeSmaZlg4/JaNgp2IiZDhFharuZNJAi2JF1cPU63t5L5ymzlXj68CG27/Eho+6QVGq5bY\ns9KPMU8/RjvHCAYIjxYXVHVZkQda/cU=/229M0X",
"requesturl": "bQ7NZavNFGBLXNJGNl0ziJa7S9q25jjWrWvJhkbcyw/gYya5uNYcX4jgvmfb0xi/zCpBp7LUkDPF\nmmEpCTQpBS4cljBee1aUgLwpfQtH3ss=/229M0X",
"service_id": "17218",
"request_datetime": "2020-03-18T04:38:55.523Z"
},
"fields": {
"@timestamp": [
1584506458450
],
"request_datetime": [
1584506335523
],
"response_datetime": [
1584506335821
]
},
"sort": [
1584506458450
]
}
What type of queries are you experiencing latency issues for?
Aggrate Query:
{ "size": 0, "query" : {
"bool" : {
"filter" : [
{
"bool" : {
"must" : [
{
"range" : {
"@timestamp" : {
"from" : "2020-03-17T04:38:55.821Z",
"to" : "2020-03-18T04:38:55.821Z"
}
}
}
]
}
}
]
}
},
"aggregations": {
"client_id": {
"terms": {
"field": "client_id",
"size" : 10000
},
"aggregations": {
"integration_id": {
"terms": {
"field": "integration_id",
"size" : 10000
},"aggregations": {
"method_id": {
"terms": {
"field": "method_id",
"size" : 10000
},
"aggregations": {
"flow_id": {
"terms": {
"field": "flow_id",
"size" : 10000
},
"aggregations": {
"datehour": {
"date_histogram": {
"field": "datetime",
"interval": "1h"
}
}
}
}
}
}
}
}
}
}
}
}