Watcher with conditions fails on transform

alerting

(Jeffrey ) #1

Good afternoon,

I'm trying to create a Watcher for cluster alerting in Slack. I'm using the Elasticsearch cluster state watcher included with X-Pack. For testing, i'm verifying if the behavior works like the built-in one. My needs are to alert on cluster outage, and message when the cluster is healthy again within Slack.

However, I'm receiving transform errors when trying to simulate the Watcher. When i'm using a reallife scenario (i.e shutting down one node) I keep getting mails the cluster is yellow. I do not receive any e-mail when the cluster is green again.

    `{
      "trigger": {
        "schedule": {
          "interval": "1m"
        }
      },
      "input": {
        "chain": {
          "inputs": [
            {
              "check": {
                "search": {
                  "request": {
                    "search_type": "query_then_fetch",
                    "indices": [
                      ".monitoring-es-*"
                    ],
                    "types": [],
                    "body": {
                      "size": 1,
                      "sort": [
                        {
                          "timestamp": {
                            "order": "desc"
                          }
                        }
                      ],
                      "_source": [
                        "cluster_state.status"
                      ],
                      "query": {
                        "bool": {
                          "filter": [
                            {
                              "term": {
                                "cluster_uuid": "{{ctx.metadata.xpack.cluster_uuid}}"
                              }
                            },
                            {
                              "bool": {
                                "should": [
                                  {
                                    "term": {
                                      "_type": "cluster_state"
                                    }
                                  },
                                  {
                                    "term": {
                                      "type": "cluster_stats"
                                    }
                                  }
                                ]
                              }
                            }
                          ]
                        }
                      }
                    }
                  }
                }
              }
            },
            {
              "alert": {
                "search": {
                  "request": {
                    "search_type": "query_then_fetch",
                    "indices": [
                      ".monitoring-alerts-6"
                    ],
                    "types": [],
                    "body": {
                      "size": 1,
                      "terminate_after": 1,
                      "query": {
                        "bool": {
                          "filter": {
                            "term": {
                              "_id": "{{ctx.watch_id}}"
                            }
                          }
                        }
                      },
                      "sort": [
                        {
                          "timestamp": {
                            "order": "desc"
                          }
                        }
                      ]
                    }
                  }
                }
              }
            },
            {
            
      "condition": {
        "script": {
          "source": "ctx.vars.fails_check = ctx.payload.check.hits.total != 0 && ctx.payload.check.hits.hits[0]._source.cluster_state.status != 'green';ctx.vars.not_resolved = ctx.payload.alert.hits.total == 1 && ctx.payload.alert.hits.hits[0]._source.resolved_timestamp == null;return ctx.vars.fails_check || ctx.vars.not_resolved",
          "lang": "painless"
        }
      },
      "transform": {
        "script": {
          "source": "ctx.vars.email_recipient = (ctx.payload.kibana_settings.hits.total > 0) ? ctx.payload.kibana_settings.hits.hits[0]._source.kibana_settings.xpack.default_admin_email : null;ctx.vars.is_new = ctx.vars.fails_check && !ctx.vars.not_resolved;ctx.vars.is_resolved = !ctx.vars.fails_check && ctx.vars.not_resolved;def state = ctx.payload.check.hits.hits[0]._source.cluster_state.status;if (ctx.vars.not_resolved){ctx.payload = ctx.payload.alert.hits.hits[0]._source;if (ctx.vars.fails_check == false) {ctx.payload.resolved_timestamp = ctx.execution_time;}} else {ctx.payload = ['timestamp': ctx.execution_time, 'metadata': ctx.metadata.xpack];}if (ctx.vars.fails_check) {ctx.payload.prefix = 'Elasticsearch cluster status is ' + state + '.';if (state == 'red') {ctx.payload.message = 'Allocate missing primary shards and replica shards.';ctx.payload.metadata.severity = 2100;} else {ctx.payload.message = 'Allocate missing replica shards.';ctx.payload.metadata.severity = 1100;}}ctx.vars.state = state.toUpperCase();ctx.payload.update_timestamp = ctx.execution_time;return ctx.payload;",
          "lang": "painless"
        }
      },
      "actions": {
        "add_to_alerts_index": {
          "index": {
            "index": ".monitoring-alerts-6",
            "doc_type": "doc",
            "doc_id": "i-BZfpJDT2SUTGAk1NDq_g_elasticsearch_cluster_status"
          }
        },

}
`

I've removed some code due to post message limit. However the watcher is retrievable through 'get .watcher/_search'
I'm receiving the following error:
transform": { "type": "script", "status": "failure", "reason": "runtime error", "error": { "root_cause": [ { "type": "script_exception", "reason": "runtime error", "script_stack": [ "ctx.vars.is_new = ctx.vars.fails_check && !ctx.vars.not_resolved;", " ^---- HERE"

Thank you in advanced


(Alexander Reelsen) #2

can you include the full output of the Execute Watch API in a gist please? It is important to have all of the data.

Thanks!


(Jeffrey ) #3

https://privatebin.net/?2ceb52a5821a5a5e#SFhSEwDq8z3P5cWFkZ2KhIzLglso/t6Yk1MY4QAd2XI=

Password is elastic, removed sensitive on-premise data


(Alexander Reelsen) #4

the watch somehow seems to be misformatted, I cannot store it locally - can you also put it in a full gist? Thanks a lot!


(Jeffrey ) #5

I'm basing it on the json from Alert when okay

If you want to try it yourself, you can retrieve the default watcher through the GET api (GET _xpack/watcher/watch/clusterid_elasticsearch_cluster_status), make it into a watcher, fake the condition and simulate. Then you'll receive the runtime error

`"reason": "runtime error",
  "error": {
    "root_cause": [
      {
        "type": "script_exception",
        "reason": "runtime error",
        "script_stack": [
          "ctx.vars.is_new = ctx.vars.fails_check && !ctx.vars.not_resolved;",
          "                          ^---- HERE"
        ],`

(Jeffrey ) #6

I've managed to make it work, it needs data available to execute the transform. I'm able to replicate a message when the cluster status is yellow (i.e new). However, when cluster status is resolved, I'm receiving a default mail.

{ "trigger": { "schedule": { "interval": "10s" } }, "input": { "chain": { "inputs": [ { "check": { "search": { "request": { "search_type": "query_then_fetch", "indices": [ ".monitoring-es-*" ], "types": [], "body": { "size": 1, "query": { "bool": { "filter": [ { "term": { "cluster_uuid": "8w0nvzotQwOFrUCsM_95KQ" } }, { "bool": { "should": [ { "term": { "_type": "cluster_state" } }, { "term": { "type": "cluster_stats" } } ] } } ] } }, "_source": [ "cluster_state.status" ], "sort": [ { "timestamp": { "order": "desc" } } ] } } } } }, { "alert": { "search": { "request": { "search_type": "query_then_fetch", "indices": [ ".monitoring-alerts-6", ".monitoring-alerts-2" ], "types": [], "body": { "size": 1, "query": { "bool": { "filter": { "term": { "_id": "{{ctx.watch_id}}" } } } }, "terminate_after": 1, "sort": [ { "timestamp": { "order": "desc" } } ] } } } } }, { "kibana_settings": { "search": { "request": { "search_type": "query_then_fetch", "indices": [ ".monitoring-kibana-6-*" ], "types": [], "body": { "size": 1, "query": { "bool": { "filter": { "term": { "type": "kibana_settings" } } } }, "sort": [ { "timestamp": { "order": "desc" } } ] } } } } } ] } }, "condition": { "script": { "source": "ctx.vars.fails_check = ctx.payload.check.hits.total != 0 && ctx.payload.check.hits.hits[0]._source.cluster_state.status != 'green'; ctx.vars.not_resolved = ctx.payload.alert.hits.total == 1 && ctx.payload.alert.hits.hits[0]._source.resolved_timestamp == null; return ctx.vars.fails_check || ctx.vars.not_resolved", "lang": "painless" } }, "actions": { "trigger_alert": { "index": { "index": ".monitoring-alerts-6", "doc_type": "doc", "doc_id": "8w0nvzotQwOFrUCsM_95KQ_elasticsearch_cluster_status" } }, "send_email_to_admin": { "condition": { "script": { "source": "return ctx.vars.email_recipient != null && (ctx.vars.is_new || ctx.vars.is_resolved)", "lang": "painless" } }, "email": { "profile": "standard", "from": "noreply@mail.nl", "to": [ "mail@mail.nl" ], "subject": "[{{#ctx.vars.is_new}}TEST NEW{{/ctx.vars.is_new}}{{#ctx.vars.is_resolved}}TEST RESOLVED{{/ctx.vars.is_resolved}}] {{ctx.metadata.name}} [{{ctx.vars.state}}]", "body": { "text": "{{#ctx.vars.is_resolved}}TEST This cluster alert has been resolved: {{/ctx.vars.is_resolved}}{{ctx.payload.prefix}} TEST {{ctx.payload.message}}TEST" } } } }, "transform": { "script": { "source": "ctx.vars.email_recipient = (ctx.payload.kibana_settings.hits.total > 0) ? ctx.payload.kibana_settings.hits.hits[0]._source.kibana_settings.xpack.default_admin_email : null;ctx.vars.is_new = ctx.vars.fails_check && !ctx.vars.not_resolved;ctx.vars.is_resolved = !ctx.vars.fails_check && ctx.vars.not_resolved;def state = ctx.payload.check.hits.hits[0]._source.cluster_state.status;if (ctx.vars.not_resolved){ctx.payload = ctx.payload.alert.hits.hits[0]._source;if (ctx.vars.fails_check == false) {ctx.payload.resolved_timestamp = ctx.execution_time;}} else {ctx.payload = ['timestamp': ctx.execution_time, 'metadata': ctx.metadata.xpack];}if (ctx.vars.fails_check) {ctx.payload.prefix = 'Elasticsearch cluster status is ' + state + '.';if (state == 'red') {ctx.payload.message = 'Allocate missing primary shards and replica shards.';ctx.payload.metadata.severity = 2100;} else {ctx.payload.message = 'Allocate missing replica shards.';ctx.payload.metadata.severity = 1100;}}ctx.vars.state = state.toUpperCase();ctx.payload.update_timestamp = ctx.execution_time;return ctx.payload;", "lang": "painless" } } }

I've tried disabling the default watcher to no effect


(system) #7

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.