we are getting 100% CPU usage for a master node.
the cluster is hosted in the Elastic cloud.
any help will be appreciated!
attached below the tasks (it seems to run /admin/snapshot/delete constantly )

not sure about this snapshot task (not sure if it's actually a snapshot- same one you'll find in Kibana.. or maybe it's talking about memory snapshot or similar), but after changing the snapshot policy in Kibana , the retention policy, to delete after 10000 snapshots / 24 days, instead of 100 and 3 days, the load on the master node stopped.
it will be great to understand this issue, as it may be a bug in the cloud / Elastic etc
What is the output from the _cluster/stats?pretty&human API?
attached. thanks a lot @warkolm
I'm still not sure why I see in the node's tasks the following actions all the time:
"action" : "cluster:admin/snapshot/status"
"action" : "cluster:admin/snapshot/get",
"action" : "cluster:admin/snapshot/delete",
while the (only) snapshot policy says
Schedule
0 0 * * * ?
{
"Mdnq4RAeRtqs004FxxQRgg:1340468" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340468,
"type" : "transport",
"action" : "cluster:monitor/xpack/ml/job/stats/get",
"description" : "",
"start_time_in_millis" : 1612349554476,
"running_time_in_nanos" : 1308251137,
"cancellable" : false,
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:466567" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 466567,
"type" : "transport",
"action" : "cluster:admin/snapshot/status",
"description" : "",
"start_time_in_millis" : 1612316979079,
"running_time_in_nanos" : 32576705363709,
"cancellable" : false,
"parent_task_id" : "Ysr-3BHkT9aKvQXSQdtr2A:1225810",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340470" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340470,
"type" : "transport",
"action" : "indices:monitor/stats",
"description" : "",
"start_time_in_millis" : 1612349554477,
"running_time_in_nanos" : 1307514234,
"cancellable" : false,
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340496" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340496,
"type" : "transport",
"action" : "cluster:monitor/tasks/lists[n]",
"description" : "",
"start_time_in_millis" : 1612349555784,
"running_time_in_nanos" : 321901,
"cancellable" : false,
"parent_task_id" : "xLr4tF2ZSSKNscRDwYLllA:1954171",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:357473" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 357473,
"type" : "transport",
"action" : "cluster:admin/snapshot/status",
"description" : "",
"start_time_in_millis" : 1612313393273,
"running_time_in_nanos" : 36162511051383,
"cancellable" : false,
"parent_task_id" : "Ysr-3BHkT9aKvQXSQdtr2A:959206",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:962091" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 962091,
"type" : "transport",
"action" : "cluster:admin/snapshot/status",
"description" : "",
"start_time_in_millis" : 1612334913474,
"running_time_in_nanos" : 14642310541533,
"cancellable" : false,
"parent_task_id" : "Ysr-3BHkT9aKvQXSQdtr2A:2495902",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340476" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340476,
"type" : "transport",
"action" : "indices:data/read/search",
"description" : """indices[.security], types[], search_type[QUERY_THEN_FETCH], scroll[5m], source[{"size":1000,"query":{"term":{"doc_type":{"value":"role-mapping","boost":1.0}}},"_source":{"includes":[],"excludes":[]}}]""",
"start_time_in_millis" : 1612349554873,
"running_time_in_nanos" : 910793110,
"cancellable" : true,
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340477" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340477,
"type" : "transport",
"action" : "indices:data/read/search",
"description" : """indices[.security], types[], search_type[QUERY_THEN_FETCH], source[{"size":0,"query":{"term":{"type":{"value":"user","boost":1.0}}},"track_total_hits":2147483647}]""",
"start_time_in_millis" : 1612349554874,
"running_time_in_nanos" : 910431309,
"cancellable" : true,
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1328664" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1328664,
"type" : "transport",
"action" : "cluster:admin/snapshot/delete",
"description" : "",
"start_time_in_millis" : 1612349116182,
"running_time_in_nanos" : 439601908076,
"cancellable" : false,
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340472" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340472,
"type" : "transport",
"action" : "indices:data/read/msearch",
"description" : """,indices[.security], types[], search_type[QUERY_THEN_FETCH], source[{"size":0,"query":{"term":{"type":{"value":"role","boost":1.0}}},"track_total_hits":2147483647}]indices:data/read/msearch[indices[.security], types[], search_type[QUERY_THEN_FETCH], source[{"size":0,"terminate_after":1,"query":{"bool":{"must":[{"term":{"type":{"value":"role","boost":1.0}}},{"bool":{"should":[{"exists":{"field":"indices.field_security.grant","boost":1.0}},{"exists":{"field":"indices.field_security.except","boost":1.0}},{"exists":{"field":"indices.fields","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"track_total_hits":2147483647}]indices:data/read/msearch[indices[.security], types[], search_type[QUERY_THEN_FETCH], source[{"size":0,"terminate_after":1,"query":{"bool":{"must":[{"term":{"type":{"value":"role","boost":1.0}}}],"filter":[{"exists":{"field":"indices.query","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"track_total_hits":2147483647}]]""",
"start_time_in_millis" : 1612349554780,
"running_time_in_nanos" : 1005474302,
"cancellable" : true,
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340473" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340473,
"type" : "transport",
"action" : "indices:data/read/search",
"description" : """indices[.security], types[], search_type[QUERY_THEN_FETCH], source[{"size":0,"query":{"term":{"type":{"value":"role","boost":1.0}}},"track_total_hits":2147483647}]""",
"start_time_in_millis" : 1612349554780,
"running_time_in_nanos" : 1005439702,
"cancellable" : true,
"parent_task_id" : "Mdnq4RAeRtqs004FxxQRgg:1340472",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340474" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340474,
"type" : "transport",
"action" : "indices:data/read/search",
"description" : """indices[.security], types[], search_type[QUERY_THEN_FETCH], source[{"size":0,"terminate_after":1,"query":{"bool":{"must":[{"term":{"type":{"value":"role","boost":1.0}}},{"bool":{"should":[{"exists":{"field":"indices.field_security.grant","boost":1.0}},{"exists":{"field":"indices.field_security.except","boost":1.0}},{"exists":{"field":"indices.fields","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"track_total_hits":2147483647}]""",
"start_time_in_millis" : 1612349554780,
"running_time_in_nanos" : 1004923500,
"cancellable" : true,
"parent_task_id" : "Mdnq4RAeRtqs004FxxQRgg:1340472",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340475" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340475,
"type" : "transport",
"action" : "indices:data/read/search",
"description" : """indices[.security], types[], search_type[QUERY_THEN_FETCH], source[{"size":0,"terminate_after":1,"query":{"bool":{"must":[{"term":{"type":{"value":"role","boost":1.0}}}],"filter":[{"exists":{"field":"indices.query","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"track_total_hits":2147483647}]""",
"start_time_in_millis" : 1612349554781,
"running_time_in_nanos" : 1004689399,
"cancellable" : true,
"parent_task_id" : "Mdnq4RAeRtqs004FxxQRgg:1340472",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:873372" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 873372,
"type" : "transport",
"action" : "cluster:admin/snapshot/status",
"description" : "",
"start_time_in_millis" : 1612331322773,
"running_time_in_nanos" : 18233011841285,
"cancellable" : false,
"parent_task_id" : "Ysr-3BHkT9aKvQXSQdtr2A:2250652",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340455" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340455,
"type" : "direct",
"action" : "internal:cluster/coordination/publish_state",
"description" : "",
"start_time_in_millis" : 1612349554277,
"running_time_in_nanos" : 1508437753,
"cancellable" : false,
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340419" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340419,
"type" : "transport",
"action" : "indices:admin/template/put",
"description" : "",
"start_time_in_millis" : 1612349553374,
"running_time_in_nanos" : 2411770240,
"cancellable" : false,
"parent_task_id" : "n86rCLPyT-ebf6e4uaMNew:2893588",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:777628" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 777628,
"type" : "transport",
"action" : "cluster:admin/snapshot/status",
"description" : "",
"start_time_in_millis" : 1612327726073,
"running_time_in_nanos" : 21829712341262,
"cancellable" : false,
"parent_task_id" : "Ysr-3BHkT9aKvQXSQdtr2A:1984186",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1330988" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1330988,
"type" : "transport",
"action" : "cluster:admin/snapshot/status",
"description" : "",
"start_time_in_millis" : 1612349202279,
"running_time_in_nanos" : 353506669293,
"cancellable" : false,
"parent_task_id" : "Ysr-3BHkT9aKvQXSQdtr2A:3515287",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1339885" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1339885,
"type" : "transport",
"action" : "cluster:monitor/xpack/usage",
"description" : "",
"start_time_in_millis" : 1612349532976,
"running_time_in_nanos" : 22809192767,
"cancellable" : false,
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340463" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340463,
"type" : "transport",
"action" : "indices:admin/template/put",
"description" : "",
"start_time_in_millis" : 1612349554374,
"running_time_in_nanos" : 1411500354,
"cancellable" : false,
"parent_task_id" : "xLr4tF2ZSSKNscRDwYLllA:1954036",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1339688" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1339688,
"type" : "transport",
"action" : "cluster:admin/snapshot/get",
"description" : "",
"start_time_in_millis" : 1612349527879,
"running_time_in_nanos" : 27906168691,
"cancellable" : false,
"parent_task_id" : "aIsgB69pTfCzDRhxS2qqEw:2724639",
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340490" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340490,
"type" : "transport",
"action" : "cluster:monitor/xpack/usage",
"description" : "",
"start_time_in_millis" : 1612349555776,
"running_time_in_nanos" : 9515129,
"cancellable" : false,
"headers" : { }
},
"Mdnq4RAeRtqs004FxxQRgg:1340491" : {
"node" : "Mdnq4RAeRtqs004FxxQRgg",
"id" : 1340491,
"type" : "transport",
"action" : "cluster:monitor/nodes/stats",
"description" : "",
"start_time_in_millis" : 1612349555776,
"running_time_in_nanos" : 9364729,
"cancellable" : false,
"headers" : { }
}
}
Thanks! Can you post the hot threads from the master as well please?
There's not a tonne jumping out to me. Have you tried raising an issue with the Support team?
It was due to an increase in index rate. An upgrade of the master nodes resolved it
Thanks a lot
© 2020. All Rights Reserved - Elasticsearch
Apache, Apache Lucene, Apache Hadoop, Hadoop, HDFS and the yellow elephant logo are trademarks of the Apache Software Foundation in the United States and/or other countries.