- List item
Hello,
I have just written an advanced watcher rule for threshold alerting (CPU), this rule works fine when I execute the rule using simulate and even sends the alert to slack. However, after saving the rule, it never triggers and the status shows active, I'm not sure if I'm missing a simple step? I have set the threshold to 0.1 % so it should definitely fire, however it does not trigger at all, please see below snippet of manual execute
Blockquote
{
"watch_id": "inlined",
"node": "7vEAFo9ZSU2DAsr_9I6j6Q",
"state": "executed",
"status": {
"state": {
"active": true,
"timestamp": "2020-02-20T18:03:16.041Z"
},
"last_checked": "2020-02-20T18:03:16.041Z",
"last_met_condition": "2020-02-20T18:03:16.041Z",
"actions": {
"slack_1": {
"ack": {
"timestamp": "2020-02-20T18:03:16.041Z",
"state": "ackable"
},
"last_execution": {
"timestamp": "2020-02-20T18:03:16.041Z",
"successful": true
},
"last_successful_execution": {
"timestamp": "2020-02-20T18:03:16.041Z",
"successful": true
}
}
},
"execution_state": "executed",
"version": -1
},
"trigger_event": {
"type": "manual",
"triggered_time": "2020-02-20T18:03:16.041Z",
"manual": {
"schedule": {
"scheduled_time": "2020-02-20T18:03:16.041Z"
}
}
},
"input": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"metricbeat-"
],
"rest_total_hits_as_int": true,
"body": {
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"@timestamp": {
"gte": "{{ctx.trigger.scheduled_time}}||-10s",
"lte": "{{ctx.trigger.scheduled_time}}",
"format": "strict_date_optional_time||epoch_millis"
}
}
},
{
"match": {
"host.name": "xx.xxx"
}
},
{
"match": {
"event.dataset": "system.cpu"
}
}
]
}
},
"aggs": {
"metricAgg": {
"max": {
"field": "system.cpu.total.norm.pct"
}
}
}
}
}
}
},
"condition": {
"script": {
"source": "if (ctx.payload.aggregations.metricAgg.value > params.threshold) { return true; } return false;",
"lang": "painless",
"params": {
"threshold": 0.001
}
}
},
"metadata": {
"name": "CPU threshold",
"xpack": {
"type": "json"
}
},
"result": {
"execution_time": "2020-02-20T18:03:16.041Z",
"execution_duration": 476,
"input": {
"type": "search",
"status": "success",
"payload": {
"_shards": {
"total": 2,
"failed": 0,
"successful": 2,
"skipped": 0
},
"hits": {
"hits": [],
"total": 1,
"max_score": null
},
"took": 7,
"timed_out": false,
"aggregations": {
"metricAgg": {
"value": 0.096
}
}
},
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"metricbeat-"
],
"rest_total_hits_as_int": true,
"body": {
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"@timestamp": {
"gte": "2020-02-20T18:03:16.041954Z||-10s",
"lte": "2020-02-20T18:03:16.041954Z",
"format": "strict_date_optional_time||epoch_millis"
}
}
},
{
"match": {
"host.name": "xx.xxx"
}
},
{
"match": {
"event.dataset": "system.cpu"
}
}
]
}
},
"aggs": {
"metricAgg": {
"max": {
"field": "system.cpu.total.norm.pct"
}
}
}
}
}
}
},
"condition": {
"type": "script",
"status": "success",
"met": true
},
"transform": {
"type": "script",
"status": "success",
"payload": {
"result": 9.6
}
},
"actions": [
{
"id": "slack_1",
"type": "slack",
"status": "success",
"slack": {
"account": "monitoring",
"sent_messages": [
{
"status": "success",
"message": {
"from": "inlined",
"text": "Watch [CPU threshold] has exceeded the threshold of 8% for node 1. Current CPU utilization is 9.6%."
}
}
]
}
}
]
},
"messages":
}
Here's the status of the watch:
Output
curl localhost:9200/_watcher/watch/0f65255b-8e0d-4aee-b8fc-8d3cf06ed3f4 | json_pp
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 1434 100 1434 0 0 175k 0 --:--:-- --:--:-- --:--:-- 200k
{
"found" : true,
"status" : {
"version" : 1,
"state" : {
"active" : true,
"timestamp" : "2020-02-19T20:58:01.657Z"
},
"actions" : {
"slack_1" : {
"ack" : {
"timestamp" : "2020-02-19T20:58:01.657Z",
"state" : "awaits_successful_execution"
}
}
}
}
--- rest of the output cut off ---
Can you please let me how I can make this rule trigger?