Hey, I've setup some watchers to query our application logs for error events and they seem to be working fine when testing and running them in DevTools. It runs successfully for periods of time when running as a watcher, and then at certain times of the day (completely random times) I start to see the State: "Error!" Comment: "Execution failing".
Any ideas why this is happening? Here is what the failed watcher looks like:
{
"watch_id": "GS1_BU-warnings",
"node": "uNQlvW8jTQS54Nr2GNM_hw",
"state": "execution_not_needed",
"user": "redacted",
"status": {
"state": {
"active": true,
"timestamp": "2019-09-25T14:42:15.524Z"
},
"last_checked": "2019-09-27T13:40:42.506Z",
"last_met_condition": "2019-09-27T12:50:42.614Z",
"actions": {
"snow_webhook": {
"ack": {
"timestamp": "2019-09-27T11:55:42.347Z",
"state": "awaits_successful_execution"
},
"last_execution": {
"timestamp": "2019-09-27T12:50:42.614Z",
"successful": false,
"reason": ""
},
"last_successful_execution": {
"timestamp": "2019-09-27T11:50:42.464Z",
"successful": true
}
}
},
"execution_state": "execution_not_needed",
"version": -1
},
"trigger_event": {
"type": "schedule",
"triggered_time": "2019-09-27T13:40:42.506Z",
"schedule": {
"scheduled_time": "2019-09-27T13:40:42.154Z"
}
},
"input": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"test-qa-b2b_gs1"
],
"rest_total_hits_as_int": true,
"body": {
"query": {
"bool": {
"must": [
{
"wildcard": {
"json.logging_error_code.keyword": "GS1_BU*"
}
},
{
"match": {
"json.loglevel": "WARNING"
}
}
],
"filter": {
"range": {
"@timestamp": {
"from": "{{ctx.trigger.scheduled_time}}||-5m",
"to": "{{ctx.trigger.triggered_time}}"
}
}
}
}
}
}
}
}
},
"condition": {
"compare": {
"ctx.payload.hits.total": {
"gt": 0
}
}
},
"metadata": {
"name": "GS1_BU-warnings",
"xpack": {
"type": "json"
}
},
"result": {
"execution_time": "2019-09-27T13:40:42.506Z",
"execution_duration": 7,
"input": {
"type": "search",
"status": "success",
"payload": {
"_shards": {
"total": 3,
"failed": 0,
"successful": 3,
"skipped": 0
},
"hits": {
"hits": [],
"total": 0,
"max_score": null
},
"took": 7,
"timed_out": false
},
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"test-qa-b2b_gs1"
],
"rest_total_hits_as_int": true,
"body": {
"query": {
"bool": {
"must": [
{
"wildcard": {
"json.logging_error_code.keyword": "GS1_BU*"
}
},
{
"match": {
"json.loglevel": "WARNING"
}
}
],
"filter": {
"range": {
"@timestamp": {
"from": "2019-09-27T13:40:42.154Z||-5m",
"to": "2019-09-27T13:40:42.506Z"
}
}
}
}
}
}
}
}
},
"condition": {
"type": "compare",
"status": "success",
"met": false,
"compare": {
"resolved_values": {
"ctx.payload.hits.total": 0
}
}
},
"actions": []
},
"messages": []
}
The output is exactly the same for a successful run, except this snippet:
failed attempt:
"last_execution": {
"timestamp": "2019-09-27T12:50:42.614Z",
"successful": false,
"reason": ""
},
successful:
"last_execution": {
"timestamp": "2019-09-26T12:55:42.636Z",
"successful": true
},
Edit: wanted to add that the webhook action is working as expected and I have validated this on the receiving end.