Metricbeat and Nginx Ingress controller crashed our production Kibana

Hi all,

Trying to get Metricbeats to scrape our Nginx-Ingress, especially so that we can see the number and response codes of the HTTP requests coming in.

Unfortunately, it just showed a bunch of obviously not real HTTP requests, which wound up hitting 40GB over the weekend and ruining our logging and crashing production.

Thankfully, I still have a job so I'm giving it another shot now.

Can someone take a look at these Helm configuration files and let me know what I did wrong? I'd be super grateful.

I'm not really a DevOps engineer and this stuff is kind of intimidating.

Metricbeat values.yaml - I tried disallowing certain HTTP codes because only the 500-level errors are important, but it didn't seem to make much difference:

extraEnvs: []
extraVolumeMounts: []
extraVolumes: []
fullnameOverride: ""
hostPathRoot: /var/lib
image: docker.elastic.co/beats/metricbeat
imagePullPolicy: IfNotPresent
imagePullSecrets: []
imageTag: 7.3.0
kube-state-metrics:
  affinity: {}
  collectors:
    certificatesigningrequests: false
    configmaps: false
    cronjobs: false
    daemonsets: false
    deployments: false
    endpoints: false
    horizontalpodautoscalers: false
    ingresses: false
    jobs: false
    limitranges: false
    namespaces: false
    nodes: false
    persistentvolumeclaims: false
    persistentvolumes: false
    poddisruptionbudgets: false
    pods: false
    replicasets: false
    replicationcontrollers: false
    resourcequotas: false
    secrets: false
    services: false
    statefulsets: false
  global: {}
  hostNetwork: false
  image:
    pullPolicy: IfNotPresent
    repository: quay.io/coreos/kube-state-metrics
    tag: v1.6.0
  nodeSelector: {}
  podAnnotations: {}
  podSecurityPolicy:
    annotations: {}
    enabled: false
  prometheus:
    monitor:
      additionalLabels: {}
      enabled: false
      namespace: ""
  prometheusScrape: true
  rbac:
    create: true
  replicas: 1
  securityContext:
    enabled: true
    fsGroup: 65534
    runAsUser: 65534
  service:
    loadBalancerIP: ""
    nodePort: 0
    port: 8080
    type: ClusterIP
  serviceAccount:
    create: true
    imagePullSecrets: []
  tolerations: []
livenessProbe:
  failureThreshold: 3
  initialDelaySeconds: 10
  periodSeconds: 10
  timeoutSeconds: 5
managedServiceAccount: true
metricbeatConfig:
  metricbeat.yml: |
    system:
      hostfs: /hostfs
    reload.enabled: true
    metricbeat.modules:
    - module: prometheus
      metricsets: ["collector"]
      period: 10s
      hosts: ["111.11.111.111:9913"]
      metrics_path: /metrics
      namespace: kube-system
      processors:
        - drop_event:
           when:
              or:
                - equals:
                   service.address: '111.11.111.111:9913'
                - equals:
                   prometheus.labels.code: '200'
                - equals:
                   prometheus.labels.status: '200'
                - equals:
                   prometheus.labels.status: '201'
                - equals:
                   prometheus.labels.status: '206'
                - equals:
                   prometheus.labels.status: '301'
                - equals:
                   prometheus.labels.status: '302'
                - equals:
                   prometheus.labels.status: '303'
                - equals:
                   prometheus.labels.status: '304'
                - equals:
                   prometheus.labels.status: '308'
                - equals:
                   prometheus.labels.status: '400'
                - equals:
                   prometheus.labels.status: '401'
                - equals:
                   prometheus.labels.status: '402'
                - equals:
                   prometheus.labels.status: '403'
                - equals:
                   prometheus.labels.status: '404'
                - equals:
                   prometheus.labels.status: '406'
                - equals:
                   prometheus.labels.status: '408'
                - equals:
                   prometheus.labels.status: '413'
                - equals:
                   prometheus.labels.status: '422'
                - equals:
                   prometheus.labels.status: '500'
    - module: postgresql
      enabled: true
      metricsets:
        - database
      hosts: ['postgres://monitor:fdsafdsafdsafdsa@aaa-production.dfsafdafs.us-east-1.rds.amazonaws.com:5432/aaa_production']
      processors:
        - drop_event:
           when:
             not:
               equals:
                 postgresql.database.name: aaaa_production
    output.elasticsearch:
      hosts: '${ELASTICSEARCH_HOSTS:elasticsearch-master:9200}'
nameOverride: ""
podAnnotations: {}
podSecurityContext:
  privileged: false
  runAsUser: 0
readinessProbe:
  failureThreshold: 3
  initialDelaySeconds: 10
  periodSeconds: 10
  timeoutSeconds: 5
replicas: 1
resources:
  limits:
    cpu: 1000m
    memory: 200Mi
  requests:
    cpu: 100m
    memory: 100Mi
secretMounts: []
serviceAccount: ""
terminationGracePeriod: 30
tolerations: []
updateStrategy: RollingUpdate

Nginx-ingress values.yaml:

controller:
  config:
    use-forwarded-headers: "true"
  metrics:
    enabled: true
    service:
      annotations:
        prometheus.io/port: "10254"
        prometheus.io/scrape: "true"
  replicaCount: 1
  resources:
    requests:
      cpu: 100m
      memory: 64Mi
  service:
    annotations:
      service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
      service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600"
      service.beta.kubernetes.io/aws-load-balancer-ssl-cert: arn:aws:acm:us-east-1:432432432:certificate/432432432432
      service.beta.kubernetes.io/aws-load-balancer-ssl-ports: https
    targetPorts:
      http: http
      https: http
http-snippet: |
  server {
    listen 18080;

    location /nginx_status {
      allow 127.0.0.1;
      allow ::1;
      deny all;
      stub_status on;
    }

    location / {
      return 404;
    }
  }