I need to monitor memory usage rate for multiple hosts, get data every five minutes, if the set threshold is exceeded, the alarm is triggered and a separate warning message is sent according to the host name. Hopefully a universal alarm setting. My current configuration:
{
"trigger": {
"schedule": {
"interval": "5m"
}
},
"input": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"metricbeat-*"
],
"rest_total_hits_as_int": true,
"body": {
"query": {
"bool": {
"filter": {
"range": {
"@timestamp": {
"gte": "{{ctx.trigger.scheduled_time}}||-5m",
"lte": "{{ctx.trigger.scheduled_time}}",
"format": "strict_date_optional_time||epoch_millis"
}
}
}
}
},
"aggs": {
"host": {
"terms": {
"field": "host.name"
},
"aggs": {
"metric": {
"avg": {
"field": "system.memory.used.pct"
}
}
}
}
}
}
}
}
},
"condition": {
"script": {
"source": "if (ctx.payload.aggregations.metricAgg.value > params.threshold) { return true; } return false;",
"lang": "painless",
"params": {
"threshold": 0.8
}
}
},
"actions": {
"slack_1": {
"transform": {
"script": {
"source": "def df = new DecimalFormat('##.##'); return ['memory_used': df.format(ctx.payload.aggregations.metricAgg.value * params.percent), 'hostname': ctx.payload.hits.hits.0._source.host.name]",
"lang": "painless",
"params": {
"percent": 100
}
}
},
"slack": {
"message": {
"to": [
"#elk"
],
"text": "Host {{ctx.payload.hostname}} memory alarm, alarm value is {{ctx.payload.memory_used}}% ."
}
}
}
}
}
output:
"aggregations": {
"host": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"doc_count": 146,
"metric": {
"value": 0.9885454545454546
},
"key": "es7_02"
},
{
"doc_count": 139,
"metric": {
"value": 0.985
},
"key": "es7_01"
}
]
}
}
I can get hostname of two hosts and the average system.memory.used.pct of five minutes , but only one host can be received warning message at a time, May I ask what I need to do to read key and value in the bucket in batch for alarm, or is there any other better method.