Hi, I'm trying to do a filesystem disk threshold alert and based on the watcher history, am trying to see if the hostname has already been alerted and if yes, remove the hostname from the first bucket.
Reason we're doing this is to avoid multiple alerts unless it's a customer that hasn't been notified before during the throttling period. If there's a better way to achieve this, I'm all ears for that too.
Also, I'm using this example as reference.
However, that example was able to work through the index field for started_processes, something I'm trying to achieve here but have failed so far.
Thanks in advance. sorry for the formatting issues, don't know how to display with proper indentations.
{
"trigger": {
"schedule": {
"interval": "1m"
}
},
"input": {
"chain": {
"inputs": [
{
"first": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"metricbeat-*"
],
"types": [],
"body": {
"size": 0,
"query": {
"bool": {
"filter": [
{
"range": {
"@timestamp": {
"gte": "now-15m",
"lte": "now"
}
}
},
{
"range": {
"system.filesystem.used.pct": {
"gte": "{{ctx.metadata.threshold}}"
}
}
}
]
}
},
"aggs": {
"host": {
"terms": {
"script": {
"source": "doc['beat.hostname.keyword'].value + '||' + doc['system.filesystem.mount_point.keyword'].value"
}
},
"aggs": {
"latest_value": {
"top_hits": {
"size": 1,
"sort": {
"system.filesystem.used.pct": {
"order": "desc"
}
},
"_source": {
"includes": [
"system.filesystem.used.pct",
"fields.customer",
"system.filesystem.mount_point",
"beat.hostname"
]
}
}
}
}
}
}
}
}
}
}
},
{
"second": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
".watcher-history*"
],
"types": [],
"body": {
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"watch_id": "filesystem_3_inprogress"
}
},
{
"terms": {
"result.transform.payload.servers":" THIS IS THE PART THAT DOESN'T WORK, this field is written by transform below and not indexed by watcher history and hence cannot be queried. "
}
},
{
"range": {
"trigger_event.triggered_time": {
"gte": "now-15m",
"lte": "now"
}
}
}
]
}
}
}
}
}
}
}
]
}
},
"condition": {
"script": {
"source": "return ctx.payload.second.hits.total > 0",
"lang": "painless"
}
},
"actions": {
"logging": {
"throttle_period_in_millis": 900000,
"logging": {
"text": "Some filesystem disks are over {{ctx.payload.threshold}}% utilized : {{#ctx.payload.hosts}}{{hostname}}'s {{disk_name}} disk for customer {{key}} is {{latest_value}}% full ; {{/ctx.payload.hosts}}"
}
}
},
"metadata": {
"threshold": 0.2
},
"transform": {
"script": {
"source": "def threshold_p = ctx.metadata.threshold*100;def firstBucket=ctx.payload.aggregations.host.buckets; def servers=[]; firstBucket.forEach(hit -> servers.add(hit.key)); return ['servers':servers, 'threshold': (int)threshold_p, 'hosts': firstBucket.stream().map(p -> [ 'key': p.latest_value.hits.hits.0._source.fields.customer, 'hostname': p.latest_value.hits.hits.0._source.beat.hostname, 'latest_value': (int) (p.latest_value.hits.hits.0._source.system.filesystem.used.pct*100), 'disk_name': p.latest_value.hits.hits.0._source.system.filesystem.mount_point]).collect(Collectors.toList()) ];",
"lang": "painless"
}
}
}