Anomalie Detection : Need Help Please

Hello,

I'm desperately trying to create a ML job to answer the following question:

How to identify a gap for a user based on elements like: Country, City, ...
Example if an user "Max" who often connects from FRANCE, city of PARIS and uses a WINDOWS, at a given moment connects from another country, or another city, or with another OS is notified by my JOB.
I have an index which contains all the information about user like userid, country, city, OSFamily ....
Once the JOB started, no result displayed, I think my choices are not good.
Here the JOB in question:

{
"job_id": "3",
"job_type": "anomaly_detector",
"job_version": "7.13.1",
"create_time": 1624003582815,
"custom_settings": {
"custom_urls":
},
"description": "",
"analysis_config": {
"bucket_span": "15m",
"detectors": [
{
"detector_description": "distinct_count(country) partitionfield=userId",
"function": "distinct_count",
"field_name": "country",
"partition_field_name": "userId",
"detector_index": 0
},
{
"detector_description": "distinct_count(city) partitionfield=userId",
"function": "distinct_count",
"field_name": "city",
"partition_field_name": "userId",
"detector_index": 1
},
{
"detector_description": "distinct_count(osFamily) partitionfield=userId",
"function": "distinct_count",
"field_name": "osFamily",
"partition_field_name": "userId",
"detector_index": 2
}
],
"influencers": [
"country",
"city",
"osFamily"
]
},
"analysis_limits": {
"model_memory_limit": "166mb",
"categorization_examples_limit": 4
},
"data_description": {
"time_field": "startTime",
"time_format": "epoch_ms"
},
"model_plot_config": {
"enabled": false,
"annotations_enabled": false
},
"model_snapshot_retention_days": 10,
"daily_model_snapshot_retention_after_days": 1,
"results_index_name": "shared",
"allow_lazy_open": false,
"data_counts": {
"job_id": "3",
"processed_record_count": 0,
"processed_field_count": 0,
"input_bytes": 0,
"input_field_count": 0,
"invalid_date_count": 0,
"missing_field_count": 0,
"out_of_order_timestamp_count": 0,
"empty_bucket_count": 0,
"sparse_bucket_count": 0,
"bucket_count": 0,
"input_record_count": 0
},
"model_size_stats": {
"job_id": "3",
"result_type": "model_size_stats",
"model_bytes": 0,
"total_by_field_count": 0,
"total_over_field_count": 0,
"total_partition_field_count": 0,
"bucket_allocation_failures_count": 0,
"memory_status": "ok",
"categorized_doc_count": 0,
"total_category_count": 0,
"frequent_category_count": 0,
"rare_category_count": 0,
"dead_category_count": 0,
"failed_category_count": 0,
"categorization_status": "ok",
"log_time": 1624003604016
},
"forecasts_stats": {
"total": 0,
"forecasted_jobs": 0
},
"state": "opened",
"node": {
"id": "ZHYsMqCORDeOcS2b3XLQog",
"name": "node-1",
"ephemeral_id": "rG5ykKlSTVWlZdHbmHZHug",
"transport_address": "10.10.0.217:9300",
"attributes": {
"ml.machine_memory": "8589328384",
"xpack.installed": "true",
"transform.node": "true",
"ml.max_open_jobs": "512",
"ml.max_jvm_size": "4294967296"
}
},
"assignment_explanation": "",
"open_time": "2497s",
"timing_stats": {
"job_id": "3",
"bucket_count": 0,
"total_bucket_processing_time_ms": 0,
"exponential_average_bucket_processing_time_per_hour_ms": 0
},
"datafeed_config": {
"datafeed_id": "datafeed-3",
"job_id": "3",
"query_delay": "102956ms",
"chunking_config": {
"mode": "auto"
},
"indices_options": {
"expand_wildcards": [
"open"
],
"ignore_unavailable": false,
"allow_no_indices": true,
"ignore_throttled": true
},
"query": {
"bool": {
"must": {
"exists": {
"field": "userId"
}
}
}
},
"indices": [
"log_users"
],
"scroll_size": 1000,
"delayed_data_check_config": {
"enabled": true
},
"state": "started",
"node": {
"id": "ZHYsMqCORDeOcS2b3XLQog",
"name": "node-1",
"ephemeral_id": "rG5ykKlSTVWlZdHbmHZHug",
"transport_address": "10.10.0.217:9300",
"attributes": {
"ml.machine_memory": "8589328384",
"ml.max_open_jobs": "512",
"ml.max_jvm_size": "4294967296"
}
},
"assignment_explanation": "",
"timing_stats": {
"job_id": "3",
"search_count": 6,
"bucket_count": 0,
"total_search_time_ms": 6220,
"exponential_average_search_time_per_hour_ms": 6220
}
}
}

Thanks. :slightly_smiling_face:

It seems to me you need to use rare functions.

Rare functions in the detectors allow you to bubble up rare behavior given past behavior.

Here is a nice blog digging into it further

I think, if each user is different and should be treated separately, you will have three detectors

{
"detector_description": "rare by country partitionfield=userId",
"function": "rare",
"by_field_name": "country",
"partition_field_name": "userId"
},
{
"detector_description": "rare by city partitionfield=userId",
"function": "rare",
"by_field_name": "city",
"partition_field_name": "userId"
},
{
"detector_description": "rare by osFamily partitionfield=userId",
"function": "rare",
"by_field_name": "osFamily",
"partition_field_name": "userId"
}

Hi BenTrent,

Thank you so much for your answer, i'll try that :smiley:

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.