Watcher Cluster Health

alerting

(Michael Mack) #1

I'm using the example cluster health watch to monitor if cluster goes into yellow state. I'm following watch is failing to execute. Any help would be appreciated. Other watches are working correctly.

PUT _watcher/watch/cluster_yellow_alert
{
"trigger": {
"schedule": {
"interval": "1m"
}
},
"input": {
"search": {
"request": {
"indices": ".marvel-*",
"types": "cluster_stats",
"body": {
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"range": {
"@timestamp": {
"gte": "now-2m",
"lte": "now"
}
}
}
],
"should": [
{
"term": {
"status.raw": "red"
}
},
{
"term": {
"status.raw": "green"
}
},
{
"term": {
"status.raw": "yellow"
}
}
]
}
}
}
},
"fields": ["@timestamp","status"],
"sort": [
{
"@timestamp": {
"order": "desc"
}
}
],
"size": 1,
"aggs": {
"minutes": {
"date_histogram": {
"field": "@timestamp",
"interval": "5s"
},
"aggs": {
"status": {
"terms": {
"field": "status.raw",
"size": 3
}
}
}
}
}
}
}
}
},
"throttle_period": "5m",
"condition": {
"script": {
"inline": "if (ctx.payload.hits.total < 1) return false; def rows = ctx.payload.hits.hits; if (rows[0].fields.status[0] != 'yellow') return false; if (ctx.payload.aggregations.minutes.buckets.size() < 12) return false; def last60Seconds = ctx.payload.aggregations.minutes.buckets[-12..-1]; return last60Seconds.every { it.status.buckets.every { s -> s.key == 'yellow' } }"
}
},
"actions": {
"send_email": {
"email": {
"to": "removed email address",
"subject": "[PRE] Watcher Notification - Cluster has been yellow for the last 60 seconds",
"body": "Your cluster has been yellow for the last 60 seconds."
}
}
}
}


(Mark Walkom) #2

What do you mean by failing to execute? Are you getting errors in the logs, something else?


(system) #3