Hi there -
We have a few watchers that were running successfully and suddenly started to error out. Can confirm that nothing was changed on our end.
Watch (actions and transform ommited):
{
"trigger": {
"schedule": {
"interval": "1m"
}
},
"input": {
"search": {
"request": {
"search_type": "query_then_fetch",
"indices": [
"logstash*"
],
"rest_total_hits_as_int": true,
"body": {
"query": {
"bool": {
"filter": [
{
"term": {
"heroku_dyno.keyword": {
"value": "router"
}
}
},
{
"term": {
"app.keyword": {
"value": "integration-service"
}
}
},
{
"range": {
"@timestamp": {
"to": "now",
"from": "now-{{ctx.metadata.interval_minutes}}m"
}
}
}
],
"must_not": [
{
"term": {
"code.keyword": {
"value": "H18"
}
}
},
{
"term": {
"app.keyword": {
"value": "admin"
}
}
},
{
"term": {
"path.keyword": {
"value": "/ping"
}
}
}
]
}
},
"aggs": {
"by_time": {
"date_histogram": {
"field": "@timestamp",
"calendar_interval": "minute"
},
"aggs": {
"failed": {
"range": {
"ranges": [
{
"from": 500,
"key": "failed"
}
],
"field": "status",
"keyed": true
}
}
}
}
}
}
}
}
},
"condition": {
"script": {
"source": "def time_buckets = ctx.payload.aggregations.by_time.buckets; for (time in time_buckets) { float total_request_count = (float)time.doc_count; float failed_count = (float)time.failed.buckets.failed.doc_count; float failed_percent = failed_count / total_request_count; if (failed_percent > ctx.metadata.threshold_percent && failed_count > ctx.metadata.threshold_minimum_count && total_request_count > ctx.metadata.threshold_low_traffic) { return true; } } return false;",
"lang": "painless"
}
},
"metadata": {
"threshold_percent": 0.1,
"threshold_minimum_count": 4,
"environment": "prod",
"victorops_email": "fb368c57-d053-4aac-a3d8-debb42dc15a5+dev-oncall@alert.victorops.com",
"interval_minutes": 3,
"alert_level": "critical",
"threshold_low_traffic": 10
}
}
Simulate error:
"exception": {
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"time_buckets = ctx.payload.aggregations.by_time.buckets; ",
" ^---- HERE"
],
"script": "def time_buckets = ctx.payload.aggregations.by_time.buckets; for (time in time_buckets) { float total_request_count = (float)time.doc_count; float failed_count = (float)time.failed.buckets.failed.doc_count; float failed_percent = failed_count / total_reque ...",
"lang": "painless",
"position": {
"offset": 43,
"start": 4,
"end": 61
},
"caused_by": {
"type": "null_pointer_exception",
"reason": "cannot access method/field [by_time] from a null def reference",
"stack_trace": "java.lang.NullPointerException: cannot access method/field [by_time] from a null def reference\n\tat org.elasticsearch.painless@8.4.2/org.elasticsearch.painless.DefBootstrap$PIC.checkNull(DefBootstrap.java:141)\n\tat org.elasticsearch.painless@8.4.2/org.elasticsearch.painless.DefBootstrap$PIC.fallback(DefBootstrap.java:230)\n\tat org.elasticsearch.painless.PainlessScript$Script.execute(def time_buckets = ctx.payload.aggregations.by_time.buckets; for (time in time_buckets) { float total_request_count = (float)time.doc_count; float failed_count = (float)time.failed.buckets.failed.doc_count; float failed_percent = failed_count / total_reque ...:44)\n\tat org.elasticsearch.xpack.watcher.condition.ScriptCondition.doExecute(ScriptCondition.java:65)\n\tat org.elasticsearch.xpack.watcher.condition.ScriptCondition.execute(ScriptCondition.java:60)\n\tat org.elasticsearch.xpack.watcher.execution.ExecutionService.executeInner(ExecutionService.java:539)\n\tat org.elasticsearch.xpack.watcher.execution.ExecutionService.execute(ExecutionService.java:342)\n\tat org.elasticsearch.xpack.watcher.transport.actions.TransportExecuteWatchAction$1.doRun(TransportExecuteWatchAction.java:193)\n\tat org.elasticsearch.server@8.4.2/org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)\n\tat org.elasticsearch.xpack.watcher.execution.ExecutionService$WatchExecutionTask.run(ExecutionService.java:666)\n\tat org.elasticsearch.server@8.4.2/org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:710)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)\n\tat java.base/java.lang.Thread.run(Thread.java:833)\n"
},
Through some debugging, I found that ctx.payload
is not null, however ctx.payload.aggregations
returns null. ctx.payload.aggregations.by_time.buckets
should be non-null.
Thanks!