How can APM anomaly detection be fine-tuned or adjusted to effectively address periodic fluctuations in service metrics?

Elastic Stack v8.13.3
Use the Elastic APM monitoring service and enable APM Machine Learning (ML). My service metrics data exhibits a regular pattern, with a higher number of visits during weekdays and a significantly lower number on weekends. Consequently, every Monday, when the number of service visits increases, anomaly detection assigns a high anomaly score, although this increase actually represents a normal behavior for this time period. Could you provide guidance on how to debug or adjust the APM anomaly detection settings?

GET _ml/anomaly_detectors/apm-prod-f59e-apm_tx_metrics?pretty
{
  "count": 1,
  "jobs": [
    {
      "job_id": "apm-prod-f59e-apm_tx_metrics",
      "job_type": "anomaly_detector",
      "job_version": "12.0.0",
      "create_time": 1742416278745,
      "model_snapshot_id": "1743470520",
      "custom_settings": {
        "managed": true,
        "job_tags": {
          "environment": "prod",
          "apm_ml_version": 3
        },
        "custom_urls": []
      },
      "datafeed_config": {
        "datafeed_id": "datafeed-apm-prod-f59e-apm_tx_metrics",
        "job_id": "apm-prod-f59e-apm_tx_metrics",
        "authorization": {
          "roles": [
            "superuser"
          ]
        },
        "query_delay": "120s",
        "chunking_config": {
          "mode": "off"
        },
        "indices_options": {
          "ignore_unavailable": true,
          "expand_wildcards": [
            "open"
          ],
          "allow_no_indices": true,
          "ignore_throttled": true
        },
        "query": {
          "bool": {
            "filter": [
              {
                "term": {
                  "processor.event": "metric"
                }
              },
              {
                "term": {
                  "metricset.name": "transaction"
                }
              },
              {
                "term": {
                  "service.environment": "prod"
                }
              }
            ]
          }
        },
        "indices": [
          "metrics-apm*",
          "apm-*"
        ],
        "aggregations": {
          "buckets": {
            "composite": {
              "size": 5000,
              "sources": [
                {
                  "date": {
                    "date_histogram": {
                      "field": "@timestamp",
                      "fixed_interval": "60s"
                    }
                  }
                },
                {
                  "transaction.type": {
                    "terms": {
                      "field": "transaction.type"
                    }
                  }
                },
                {
                  "service.name": {
                    "terms": {
                      "field": "service.name"
                    }
                  }
                }
              ]
            },
            "aggs": {
              "@timestamp": {
                "max": {
                  "field": "@timestamp"
                }
              },
              "transaction_throughput": {
                "rate": {
                  "unit": "minute"
                }
              },
              "transaction_latency": {
                "avg": {
                  "field": "transaction.duration.histogram"
                }
              },
              "error_count": {
                "filter": {
                  "term": {
                    "event.outcome": "failure"
                  }
                },
                "aggs": {
                  "actual_error_count": {
                    "value_count": {
                      "field": "event.outcome"
                    }
                  }
                }
              },
              "success_count": {
                "filter": {
                  "term": {
                    "event.outcome": "success"
                  }
                }
              },
              "failed_transaction_rate": {
                "bucket_script": {
                  "buckets_path": {
                    "failure_count": "error_count>_count",
                    "success_count": "success_count>_count"
                  },
                  "script": "if ((params.failure_count + params.success_count)==0){return 0;}else{return 100 * (params.failure_count/(params.failure_count + params.success_count));}"
                }
              }
            }
          }
        },
        "scroll_size": 5000,
        "delayed_data_check_config": {
          "enabled": true
        }
      },
      "groups": [
        "apm"
      ],
      "description": "Detects anomalies in transaction latency, throughput and error percentage for metric data.",
      "analysis_config": {
        "bucket_span": "15m",
        "summary_count_field_name": "doc_count",
        "detectors": [
          {
            "detector_description": "high latency by transaction type for an APM service",
            "function": "high_mean",
            "field_name": "transaction_latency",
            "by_field_name": "transaction.type",
            "partition_field_name": "service.name",
            "detector_index": 0
          },
          {
            "detector_description": "transaction throughput for an APM service",
            "function": "mean",
            "field_name": "transaction_throughput",
            "by_field_name": "transaction.type",
            "partition_field_name": "service.name",
            "detector_index": 1
          },
          {
            "detector_description": "failed transaction rate for an APM service",
            "function": "high_mean",
            "field_name": "failed_transaction_rate",
            "by_field_name": "transaction.type",
            "partition_field_name": "service.name",
            "detector_index": 2
          }
        ],
        "influencers": [
          "transaction.type",
          "service.name"
        ],
        "model_prune_window": "30d"
      },
      "analysis_limits": {
        "model_memory_limit": "512mb",
        "categorization_examples_limit": 4
      },
      "data_description": {
        "time_field": "@timestamp",
        "time_format": "epoch_ms"
      },
      "model_plot_config": {
        "enabled": true,
        "annotations_enabled": true
      },
      "model_snapshot_retention_days": 10,
      "daily_model_snapshot_retention_after_days": 1,
      "results_index_name": "custom-apm",
      "allow_lazy_open": false
    }
  ]
}

Hello @arT1 ,

The anomaly detection job will automatically recognize weekday/weekend patterns after seeing enough examples. It usually would pick up this pattern after 30 days of observation.

hello @valeriy42
Thank you for your reply!
The APM anomaly detection job has only been running for 10 days, so I'll keep watching