Watcher alert for ml jobs

alerting

(Gautam) #1

Hi,

I want to create alerts for an ml job. I'm finding anomalies in the error log. Please find the JSON below:

{
  "job_id": "errorlog_pattern_analysis",
  "job_type": "anomaly_detector",
  "job_version": "5.5.2",
  "description": "",
  "create_time": 1504176127683,
  "finished_time": 1504176198409,
  "analysis_config": {
    "bucket_span": "5m",
    "categorization_field_name": "message",
    "detectors": [
      {
        "detector_description": "count by mlcategory",
        "function": "count",
        "by_field_name": "mlcategory",
        "detector_rules": [],
        "detector_index": 0
      },
      {
        "detector_description": "non_zero_count by mlcategory",
        "function": "non_zero_count",
        "by_field_name": "mlcategory",
        "detector_rules": [],
        "detector_index": 1
      },
      {
        "detector_description": "rare by mlcategory",
        "function": "rare",
        "by_field_name": "mlcategory",
        "detector_rules": [],
        "detector_index": 2
      },
      {
        "detector_description": "time_of_week by mlcategory",
        "function": "time_of_week",
        "by_field_name": "mlcategory",
        "detector_rules": [],
        "detector_index": 3
      },
      {
        "detector_description": "time_of_day by mlcategory",
        "function": "time_of_day",
        "by_field_name": "mlcategory",
        "detector_rules": [],
        "detector_index": 4
      }
    ],
    "influencers": [
      "server.keyword"
    ]
  },
  "data_description": {
    "time_field": "@timestamp",
    "time_format": "epoch_ms"
  },
  "model_snapshot_retention_days": 1,
  "model_snapshot_id": "1504583552",
  "results_index_name": "custom-errorlog_pattern_analysis",
  "data_counts": {
    "job_id": "errorlog_pattern_analysis",
    "processed_record_count": 17729,
    "processed_field_count": 35458,
    "input_bytes": 40966034,
    "input_field_count": 35458,
    "invalid_date_count": 0,
    "missing_field_count": 17729,
    "out_of_order_timestamp_count": 0,
    "empty_bucket_count": 0,
    "sparse_bucket_count": 12,
    "bucket_count": 4523,
    "earliest_record_timestamp": 1503685805561,
    "latest_record_timestamp": 1504575522482,
    "last_data_time": 1504575757799,
    "latest_empty_bucket_timestamp": 1504575000000,
    "latest_sparse_bucket_timestamp": 1504097400000,
    "input_record_count": 17729
  },
  "model_size_stats": {
    "job_id": "errorlog_pattern_analysis",
    "result_type": "model_size_stats",
    "model_bytes": 6709326,
    "total_by_field_count": 817,
    "total_over_field_count": 0,
    "total_partition_field_count": 6,
    "bucket_allocation_failures_count": 0,
    "memory_status": "ok",
    "log_time": 1504242294000,
    "timestamp": 1504138500000
  },
  "datafeed_config": {
    "datafeed_id": "datafeed-errorlog_pattern_analysis",
    "job_id": "errorlog_pattern_analysis",
    "query_delay": "60s",
    "frequency": "150s",
    "indices": [
      "prod_log-*"
    ],
    "types": [
      "Errorlog"
    ],
    "query": {
      "bool": {
        "must_not": [
          {
            "match_phrase": {
              "message": {
                "query": "No session",
                "slop": 0,
                "boost": 1
              }
            }
          }
        ],
        "disable_coord": false,
        "adjust_pure_negative": true,
        "boost": 1
      }
    },
    "scroll_size": 1000,
    "chunking_config": {
      "mode": "auto"
    },
    "state": "stopped"
  },
  "state": "opened",
  "node": {
    "id": "mAAmcM7dSEm_CUbVi3Bn1g",
    "name": "mAAmcM7",
    "ephemeral_id": "0MUAXokqSFqW48rvc3Z_Kw",
    "transport_address": "10.60.40.120:9300",
    "attributes": {
      "ml.max_open_jobs": "10",
      "ml.enabled": "true"
    }
  },
  "open_time": "358832s"
}

I want to send alerts for all anomalies with severity grater than 50 and I also want to send the category examples. Example format:

Anomaly detected during the bucket 2017-09-05 14:20:00.000 to 14:24:59.999 for the following type of error messages:

"Error in verification. Stack java.lang.Exception Exception in getting credit card getDetails failed at .web.registration.ccVal.cmd"

Stack java.lang.Exception Exception in getting credit card getDetails failed at .web.registration.ccVal.cmd

Need to send this as attachment.

Please help.


(Alexander Reelsen) #2

Have you seen this blog post? Does it help or is it missing something?


(Gautam) #3

Hi,

That was the first thing I checked. Its a good starting point but I want to go further. Is there a way to list the category examples as attachement in the alet mail.

Basically, I want an alert on the record level along with the category examples which caused the anomaly.


(system) #4

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.