Alert / Watcher email help to get hostname in email when triggered

alerting

(Phil) #1

Hi

I am having trouble in getting hostname included in the email message.

I have several hosts name that uses heartbeats. Suppose if one of them stops sending logs to the main server an email is triggered. I have setup the email correctly but not able to identify which host is down.

Below is the watch I have setup.

    {
  "trigger": {
    "schedule": {
      "interval": "2m"
    }
  },
  "input": {
    "search": {
      "request": {
        "search_type": "query_then_fetch",
        "indices": [
          "heartbeat-*"
        ],
        "types": [],
        "body": {
          "query": {
            "bool": {
              "must": [
                {
                  "match": {
                    "host": "AAAB43Q"   (need to add more hosts here)
                  }
                },
                {
                  "match": {
                    "tags": "heartbeat"
                  }
                },
                {
                  "range": {
                    "@timestamp": {
                      "gte": "now-1m"
                    }
                  }
                }
              ]
            }
          }
        }
      }
    }
  },
  "condition": {
    "compare": {
      "ctx.payload.hits.total": {
        "lt": 12
      }
    }
  },
  "actions": {
    "email_administrator": {
      "throttle_period_in_millis": 900000,
      "email": {
        "profile": "standard",
        "attachments": {
          "attached_data": {
            "data": {
              "format": "json"
            }
          }
        },
        "priority": "low",
        "to": [
          "Phil.XXXX@xxxx.com"
        ],
        "subject": "Watcher - too heartbeat messages",
        "body": {
          "text": "{{ctx.payload.hosts}}"
        }
      }
    }
  }
}

Output: for only one host

{
  "_id": "heartbeat_watch_0825ff5d-ea77-4cf3-9064-87cc88306cf0-2018-05-22T19:50:41.245Z",
  "watch_record": {
    "watch_id": "heartbeat_watch",
    "node": "6UoSxbVmRH6qFvZ5I_HL4w",
    "state": "executed",
    "status": {
      "state": {
        "active": true,
        "timestamp": "2018-05-22T19:50:37.778Z"
      },
      "last_checked": "2018-05-22T19:50:41.245Z",
      "last_met_condition": "2018-05-22T19:50:41.245Z",
      "actions": {
        "email_administrator": {
          "ack": {
            "timestamp": "2018-05-22T19:50:41.245Z",
            "state": "ackable"
          },
          "last_execution": {
            "timestamp": "2018-05-22T19:50:41.245Z",
            "successful": true
          },
          "last_successful_execution": {
            "timestamp": "2018-05-22T19:50:41.245Z",
            "successful": true
          }
        }
      },
      "execution_state": "executed",
      "version": 38
    },
    "trigger_event": {
      "type": "manual",
      "triggered_time": "2018-05-22T19:50:41.245Z",
      "manual": {
        "schedule": {
          "scheduled_time": "2018-05-22T19:50:41.245Z"
        }
      }
    },
    "input": {
      "search": {
        "request": {
          "search_type": "query_then_fetch",
          "indices": [
            "heartbeat-*"
          ],
          "types": [],
          "body": {
            "query": {
              "bool": {
                "must": [
                  {
                    "match": {
                      "host": "AAAB43Q"
                    }
                  },
                  {
                    "match": {
                      "tags": "heartbeat"
                    }
                  },
                  {
                    "range": {
                      "@timestamp": {
                        "gte": "now-1m"
                      }
                    }
                  }
                ]
              }
            }
          }
        }
      }
    },
    "condition": {
      "compare": {
        "ctx.payload.hits.total": {
          "lt": 12
        }
      }
    },
    "result": {
      "execution_time": "2018-05-22T19:50:41.245Z",
      "execution_duration": 17,
      "input": {
        "type": "simple",
        "status": "success",
        "payload": {
          "hits": {
            "total": 0
          }
       }
      },
      "condition": {
        "type": "compare",
        "status": "success",
        "met": true,
        "compare": {
          "resolved_values": {
            "ctx.payload.hits.total": 0
          }
        }
      },
      "actions": [
        {
          "id": "email_administrator",
          "type": "email",
          "status": "success",
          "email": {
            "account": "exchange_account",
            "message": {
              "id": "heartbeat_watch_0825ff5d-ea77-4cf3-9064-87cc88306cf0-2018-05-22T19:50:41.245Z",
              "from": "AAA.SSSr@XXXXc.com",
              "priority": "low",
              "sent_date": "2018-05-22T19:50:41.249Z",
              "to": [
                "Phil.XXXX@xxxx.com"
              ],
              "subject": "Test MEssage - too heartbeat messages",
              "body": {
                "text": ""
              }
            }
          }
        }
      ]
    },
    "messages": []
  }
}

What would be ideal 1. one watch per host or one watch for all hosts ?

Thanks in advance.

Phil


(Alexander Reelsen) #2

Hey,

about how many hosts are we talking here? If it is not too many, using a terms aggregation on the hostname might make a lot of sense. If you need different email recipients, you may need to go with one watch per host (or per recipient).

Also I am not sure, what you are trying to achieve. Are you interested in the total count or do you want to compare the results with earlier ones from 5 minutes ago, and if a host is missing, run an alert? Depends on your use-case, but your current watch is a good thing to start with.

Hope this helps!

--Alex


(system) #3

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.