We have a watcher to check a java process for an application on two servers. But issue is that the application team use to get false alerts from ELK even the processes are up .
{
"trigger": {
"schedule": {
"interval": "180m"
}
},
"input": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"prod-metric_beat"
],
"rest_total_hits_as_int": true,
"body": {
"query": {
"bool": {
"must": [
{
"range": {
"@timestamp": {
"gte": "now-45m"
}
}
},
{
"terms": {
"agent.hostname": [
"pwprdsos04",
"pwprdsos05"
]
}
},
{
"bool": {
"minimum_should_match": 1,
"should": [
{
"match_phrase": {
"cmdline.keyword": "java -jar connectivityagent.jar"
}
}
]
}
}
],
"must_not": []
}
},
"aggs": {
"hostwise_agg": {
"terms": {
"field": "agent.hostname"
},
"aggs": {
"processwise_agg": {
"terms": {
"field": "system.process.name.keyword"
}
}
}
}
},
"script_fields": {
"description": {
"script": {
"lang": "painless",
"source": "params.value",
"params": {
"value": "Process is Unavailable"
}
}
},
"criticality": {
"script": {
"lang": "painless",
"source": "params.value",
"params": {
"value": "Critical"
}
}
}
},
"_source": []
}
}
}
},
"condition": {
"compare": {
"ctx.payload.hits.total": {
"lte": 1
}
}
},
"actions": {
"index_payload": {
"index": {
"index": "<watcher-alert-index-{now/d}>",
"doc_type": "my-new-type",
"execution_time_field": "@timestamp"
}
},
"email_notifcation": {
"email": {
"profile": "standard",
"from": "elk.prd.alerts@canon-europe.com",
"to": [
"saikat.guin@canon-europe.com",
"tcs_tam_se@CANONEUROPENV.onmicrosoft.com"
],
"subject": "Siebel Process status - OIC Agent: unavailable | P2 | PRD",
"body": {
"text": """#account: canontcs
#source: web
#service_instance: TCS-Siebel-Contact-Center
#category: incident
#impact: medium
#ci: prd-siebel_java-critical
Hi Team,
You are receiving this mail because we have found an incident. The below process is unavailable on pwprdsos04/pwprdsos05.
java.exe
"""
}
}
}
}
}
Please suggest where could be an issue ?