Hi all,
Trying to get Metricbeats to scrape our Nginx-Ingress, especially so that we can see the number and response codes of the HTTP requests coming in.
Unfortunately, it just showed a bunch of obviously not real HTTP requests, which wound up hitting 40GB over the weekend and ruining our logging and crashing production.
Thankfully, I still have a job so I'm giving it another shot now.
Can someone take a look at these Helm configuration files and let me know what I did wrong? I'd be super grateful.
I'm not really a DevOps engineer and this stuff is kind of intimidating.
Metricbeat values.yaml - I tried disallowing certain HTTP codes because only the 500-level errors are important, but it didn't seem to make much difference:
extraEnvs: []
extraVolumeMounts: []
extraVolumes: []
fullnameOverride: ""
hostPathRoot: /var/lib
image: docker.elastic.co/beats/metricbeat
imagePullPolicy: IfNotPresent
imagePullSecrets: []
imageTag: 7.3.0
kube-state-metrics:
affinity: {}
collectors:
certificatesigningrequests: false
configmaps: false
cronjobs: false
daemonsets: false
deployments: false
endpoints: false
horizontalpodautoscalers: false
ingresses: false
jobs: false
limitranges: false
namespaces: false
nodes: false
persistentvolumeclaims: false
persistentvolumes: false
poddisruptionbudgets: false
pods: false
replicasets: false
replicationcontrollers: false
resourcequotas: false
secrets: false
services: false
statefulsets: false
global: {}
hostNetwork: false
image:
pullPolicy: IfNotPresent
repository: quay.io/coreos/kube-state-metrics
tag: v1.6.0
nodeSelector: {}
podAnnotations: {}
podSecurityPolicy:
annotations: {}
enabled: false
prometheus:
monitor:
additionalLabels: {}
enabled: false
namespace: ""
prometheusScrape: true
rbac:
create: true
replicas: 1
securityContext:
enabled: true
fsGroup: 65534
runAsUser: 65534
service:
loadBalancerIP: ""
nodePort: 0
port: 8080
type: ClusterIP
serviceAccount:
create: true
imagePullSecrets: []
tolerations: []
livenessProbe:
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
managedServiceAccount: true
metricbeatConfig:
metricbeat.yml: |
system:
hostfs: /hostfs
reload.enabled: true
metricbeat.modules:
- module: prometheus
metricsets: ["collector"]
period: 10s
hosts: ["111.11.111.111:9913"]
metrics_path: /metrics
namespace: kube-system
processors:
- drop_event:
when:
or:
- equals:
service.address: '111.11.111.111:9913'
- equals:
prometheus.labels.code: '200'
- equals:
prometheus.labels.status: '200'
- equals:
prometheus.labels.status: '201'
- equals:
prometheus.labels.status: '206'
- equals:
prometheus.labels.status: '301'
- equals:
prometheus.labels.status: '302'
- equals:
prometheus.labels.status: '303'
- equals:
prometheus.labels.status: '304'
- equals:
prometheus.labels.status: '308'
- equals:
prometheus.labels.status: '400'
- equals:
prometheus.labels.status: '401'
- equals:
prometheus.labels.status: '402'
- equals:
prometheus.labels.status: '403'
- equals:
prometheus.labels.status: '404'
- equals:
prometheus.labels.status: '406'
- equals:
prometheus.labels.status: '408'
- equals:
prometheus.labels.status: '413'
- equals:
prometheus.labels.status: '422'
- equals:
prometheus.labels.status: '500'
- module: postgresql
enabled: true
metricsets:
- database
hosts: ['postgres://monitor:fdsafdsafdsafdsa@aaa-production.dfsafdafs.us-east-1.rds.amazonaws.com:5432/aaa_production']
processors:
- drop_event:
when:
not:
equals:
postgresql.database.name: aaaa_production
output.elasticsearch:
hosts: '${ELASTICSEARCH_HOSTS:elasticsearch-master:9200}'
nameOverride: ""
podAnnotations: {}
podSecurityContext:
privileged: false
runAsUser: 0
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
replicas: 1
resources:
limits:
cpu: 1000m
memory: 200Mi
requests:
cpu: 100m
memory: 100Mi
secretMounts: []
serviceAccount: ""
terminationGracePeriod: 30
tolerations: []
updateStrategy: RollingUpdate
Nginx-ingress values.yaml:
controller:
config:
use-forwarded-headers: "true"
metrics:
enabled: true
service:
annotations:
prometheus.io/port: "10254"
prometheus.io/scrape: "true"
replicaCount: 1
resources:
requests:
cpu: 100m
memory: 64Mi
service:
annotations:
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600"
service.beta.kubernetes.io/aws-load-balancer-ssl-cert: arn:aws:acm:us-east-1:432432432:certificate/432432432432
service.beta.kubernetes.io/aws-load-balancer-ssl-ports: https
targetPorts:
http: http
https: http
http-snippet: |
server {
listen 18080;
location /nginx_status {
allow 127.0.0.1;
allow ::1;
deny all;
stub_status on;
}
location / {
return 404;
}
}