Anomaly detection using derivative aggregation

heric · September 19, 2020, 8:16am

Hi All,

I am trying to define anomaly detection using derivative aggregation because the value that im interested in is accumulated counter ( increasing in each interval ).

Below are the anomaly detector and datafeed that i have defined, it can detect the anomaly in the counter value trend but there are two problem that i have :

Anomaly explorer is not showing the graph , but when i go to single metric it show the graph

In derivative i can add normalized_value, for example if i add "unit" : "second" in the derivative function, it will give me bytes per seconds instead of the bytes per bucket interval. I am getting the result when trying to execute the query in Dev Tools ( bytes per second value is inside normalized_value ). How can i use normalized_value in the anomaly detector instead of value ?

                    {
                      "key_as_string" : "2020-09-18T17:40:00.000Z",
                      "key" : 1600450800000,
                      "doc_count" : 1,
                      "@timestamp" : {
                        "value" : 1.6004508E12,
                        "value_as_string" : "2020-09-18T17:40:00.000Z"
                      },
                      "bytes" : {
                        "value" : 8.778613790810844E15
                      },
                      "bytes_deriv" : {
                        "value" : 8.34867321692E11,
                        "normalized_value" : 2.782891072306667E9
                      }
                    },

Anomaly detector and datafeed definition as below :

{
  "job_id": "epg-top-10-sdfid-anomaly",
  "job_type": "anomaly_detector",
  "job_version": "7.9.0",
  "groups": [
    "epg",
    "sdfid"
  ],
  "description": "EPG Top 10 SDFID Anomaly",
  "create_time": 1600462412677,
  "analysis_config": {
    "bucket_span": "15m",
    "summary_count_field_name": "doc_count",
    "detectors": [
      {
        "detector_description": """mean(bytes_deriv) by "application-name" partitionfield=node""",
        "function": "mean",
        "field_name": "bytes_deriv",
        "by_field_name": "application-name",
        "partition_field_name": "node",
        "detector_index": 0
      }
    ],
    "influencers": [
      "application-name",
      "node"
    ]
  },
  "analysis_limits": {
    "model_memory_limit": "25mb",
    "categorization_examples_limit": 4
  },
  "data_description": {
    "time_field": "@timestamp",
    "time_format": "epoch_ms"
  },
  "model_plot_config": {
    "enabled": true,
    "annotations_enabled": true
  },
  "model_snapshot_retention_days": 10,
  "daily_model_snapshot_retention_after_days": 1,
  "custom_settings": {
    "custom_urls": []
  },
  "model_snapshot_id": "1600491900",
  "results_index_name": "custom-epg-top-10-sdfid-anomaly",
  "allow_lazy_open": false,
  "data_counts": {
    "job_id": "epg-top-10-sdfid-anomaly",
    "processed_record_count": 193910,
    "processed_field_count": 775580,
    "input_bytes": 23692365,
    "input_field_count": 775580,
    "invalid_date_count": 0,
    "missing_field_count": 60,
    "out_of_order_timestamp_count": 0,
    "empty_bucket_count": 0,
    "sparse_bucket_count": 0,
    "bucket_count": 1356,
    "earliest_record_timestamp": 1599282600000,
    "latest_record_timestamp": 1600502400000,
    "last_data_time": 1600503000144,
    "input_record_count": 193910,
    "latest_bucket_timestamp": 1600502400000
  },
  "model_size_stats": {
    "job_id": "epg-top-10-sdfid-anomaly",
    "result_type": "model_size_stats",
    "model_bytes": 1713016,
    "peak_model_bytes": 1118846,
    "model_bytes_exceeded": 0,
    "model_bytes_memory_limit": 26214400,
    "total_by_field_count": 52,
    "total_over_field_count": 0,
    "total_partition_field_count": 6,
    "bucket_allocation_failures_count": 0,
    "memory_status": "ok",
    "categorized_doc_count": 0,
    "total_category_count": 0,
    "frequent_category_count": 0,
    "rare_category_count": 0,
    "dead_category_count": 0,
    "failed_category_count": 0,
    "categorization_status": "ok",
    "log_time": 1600502100148,
    "timestamp": 1600501500000
  },
  "forecasts_stats": {
    "total": 0,
    "forecasted_jobs": 0
  },
  "state": "opened",
  "node": {
    "id": "j8ag6P4jRM6i9azRTJv6TA",
    "name": "mprl343",
    "ephemeral_id": "kxyjatTZTRGbdI_TPcpfjw",
    "transport_address": "10.0.2.115:9300",
    "attributes": {
      "ml.machine_memory": "67275366400",
      "xpack.installed": "true",
      "transform.node": "true",
      "ml.max_open_jobs": "20"
    }
  },
  "assignment_explanation": "",
  "open_time": "40640s",
  "timing_stats": {
    "job_id": "epg-top-10-sdfid-anomaly",
    "bucket_count": 1491,
    "total_bucket_processing_time_ms": 32381.99999999999,
    "minimum_bucket_processing_time_ms": 0,
    "maximum_bucket_processing_time_ms": 1000,
    "average_bucket_processing_time_ms": 21.71830985915492,
    "exponential_average_bucket_processing_time_ms": 19.243338899045884,
    "exponential_average_bucket_processing_time_per_hour_ms": 276.04221048439666
  },
  "datafeed_config": {
    "datafeed_id": "datafeed-epg-top-10-sdfid-anomaly",
    "job_id": "epg-top-10-sdfid-anomaly",
    "query_delay": "300s",
    "frequency": "300s",
    "indices": [
      "mo-ps-epg-counter*"
    ],
    "query": {
      "bool": {
        "filter": [
          {
            "query_string": {
              "analyze_wildcard": true,
              "query": "counter:downlink-bytes AND ( application-name:Default_SDFID OR application-name:Heuristics_Youtube OR application-name:Facebook OR application-name:Domain_Snapchat OR application-name:Default_HTTP OR application-name:Domain_Youtube OR application-name:Domain_Instagram OR application-name:Whatsapp OR application-name:Google_Play_Apps OR application-name:Twitter OR application-name:Anghami OR application-name:Snapchat OR application-name:IP_Snapchat OR application-name:Heuristics_IMO OR application-name:Heuristics_Facebook_VOIP OR application-name:Education OR application-name:Telegram OR application-name:Heuristics_P2P OR application-name:Googlemaps)"
            }
          }
        ]
      }
    },
    "aggregations": {
      "application-name": {
        "terms": {
          "field": "application-name"
        },
        "aggregations": {
          "node": {
            "terms": {
              "field": "node"
            },
            "aggregations": {
              "buckets": {
                "date_histogram": {
                  "field": "@timestamp",
                  "interval": "5m"
                },
                "aggregations": {
                  "@timestamp": {
                    "max": {
                      "field": "@timestamp"
                    }
                  },
                  "bytes": {
                    "sum": {
                      "field": "value"
                    }
                  },
                  "bytes_deriv": {
                    "derivative": {
                      "buckets_path": "bytes",
                      "unit": "second"
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "scroll_size": 1000,
    "chunking_config": {
      "mode": "manual",
      "time_span": "300000000ms"
    },
    "delayed_data_check_config": {
      "enabled": true
    },
    "indices_options": {
      "expand_wildcards": [
        "open"
      ],
      "ignore_unavailable": false,
      "allow_no_indices": true,
      "ignore_throttled": true
    },
    "state": "started",
    "node": {
      "id": "j8ag6P4jRM6i9azRTJv6TA",
      "name": "mprl343",
      "ephemeral_id": "kxyjatTZTRGbdI_TPcpfjw",
      "transport_address": "10.0.2.115:9300",
      "attributes": {
        "ml.machine_memory": "67275366400",
        "ml.max_open_jobs": "20"
      }
    },
    "assignment_explanation": "",
    "timing_stats": {
      "job_id": "epg-top-10-sdfid-anomaly",
      "search_count": 272,
      "bucket_count": 1355,
      "total_search_time_ms": 7467,
      "average_search_time_per_bucket_ms": 5.51070110701107,
      "exponential_average_search_time_per_hour_ms": 351.1534437656155
    }
  }
}

heric · September 19, 2020, 12:29pm

I managed to solve point#2 by using bucket script aggregation

Here are the anomaly detector and datafeed after adding bucket script aggregation.

PUT /_ml/anomaly_detectors/epg-top-10-sdfid-anomaly
{
  "job_id": "epg-top-10-sdfid-anomaly",
  "description": "",
  "groups": [
    "epg",
	"sdfid"	
  ],
  "analysis_config": {
    "bucket_span": "15m",
    "detectors": [
      {
        "detector_description": "mean(traffic_gbps)",	  
        "function": "mean",
        "field_name": "traffic_gbps",
        "by_field_name": "application-name",
        "partition_field_name": "node"
      }
    ],
    "influencers": [
      "application-name",
      "node"
    ],
    "summary_count_field_name": "doc_count"
  },
  "data_description": {
    "time_field": "@timestamp",
    "time_format": "epoch_ms"	
  },
  "analysis_limits": {
    "model_memory_limit": "25MB"
  },
  "model_plot_config": {
    "enabled": true,
    "annotations_enabled": true	
  },
  "model_snapshot_retention_days": 10,
  "daily_model_snapshot_retention_after_days": 1,
  "results_index_name": "custom-epg-top-10-sdfid-anomaly"
  
}


PUT /_ml/datafeeds/datafeed-epg-top-10-sdfid-anomaly/
{
  "indices": [
    "mo-ps-epg-counter*"
  ],
"query":{"bool":{"filter":[{"query_string":{"analyze_wildcard":true,"query":"counter:downlink-bytes AND ( application-name:Default_SDFID OR application-name:Heuristics_Youtube OR application-name:Facebook OR application-name:Domain_Snapchat OR application-name:Default_HTTP OR application-name:Domain_Youtube OR application-name:Domain_Instagram OR application-name:Whatsapp OR application-name:Google_Play_Apps OR application-name:Twitter OR application-name:Anghami OR application-name:Snapchat OR application-name:IP_Snapchat OR application-name:Heuristics_IMO OR application-name:Heuristics_Facebook_VOIP OR application-name:Education OR application-name:Telegram OR application-name:Heuristics_P2P OR application-name:Googlemaps)"}}]}},
 "aggregations": {
  "application-name": {"terms": {"field": "application-name"},
   "aggregations": {
    "node": {"terms": {"field": "node"},
    "aggregations": {
		"buckets": {
        "date_histogram": {
          "field": "@timestamp",
          "interval": "5m"
        },
        "aggregations": {
          "@timestamp": {
            "max": {
              "field": "@timestamp"
            }
          },
          "bytes": {
            "sum": {
              "field": "value"
            }
          },
          "bytes_deriv": {
            "derivative": {
              "buckets_path": "bytes",
			  "unit" : "second"
            }
          },
          "traffic_gbps": {
          "bucket_script": {
            "buckets_path": {
              "my_var1": "bytes_deriv.normalized_value"
            },
            "script": "params.my_var1 * 8 / (1000*1000*1000)"
          }
         }		  
        }
      }
    }
  }
 }
}
}
,
  "frequency": "300s",
  "job_id": "epg-top-10-sdfid-anomaly",
  "datafeed_id": "datafeed-epg-top-10-sdfid-anomaly",
  "query_delay": "300s"
  
}

However i still have problem with point#1 , graph is not showing in anomaly explorer, only showing in single metric viewer.

Anyone have idea how to solve this issue.

Thanks.

Tom_Veasey · September 21, 2020, 2:59pm

This is a known issue with scripted fields. When you enable model plot this will write out the values passed to the process which performs anomaly detection. The single metric viewer reads the actual values for the chart from these job results. However, for the charts you see on the anomaly explorer we use a search of the raw data. If someone uses a script in the data feed we currently don't try and work out how to generate a query from this to show these charts. One possibility that has been raised is that we will allow custom configuration for these charts as well, for the case we can't deduce it from the data feed configuration. See this issue.

heric · September 22, 2020, 4:38am

Thank you @Tom_Veasey

system · October 20, 2020, 4:38am

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.