Daemonset - collecting the pod's namespace along with log

Hi Guys,

I set up Filebeat with the DaemonSet approach. All containers log is shipped to LogStash successfully. However, I'm facing the issue that I cannot differentiate container logs for DEV and QA namespace under the same Kubernetes cluster.

Can you help to advise is it possible to collect logs with their namespace with DaemonSet pattern?

filebeat-deployment.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: filebeat-config
  namespace: cma-logging
  labels:
    k8s-app: filebeat
data:
  filebeat.yml: |-
    filebeat.autodiscover:
      providers:
        - type: kubernetes
          node: ${NODE_NAME}
          hints.enabled: true
          hints.default_config:
            type: container
            paths:
              - /xxx/data/docker/containers/*${data.kubernetes.container.id}.log
    
    processors:
      - add_host_metadata:
      - add_kubernetes_metadata:

    output.logstash:
      hosts: ['logstash-service:5044']
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: filebeat
  namespace: cma-logging
  labels:
    k8s-app: filebeat
spec:
  selector:
    matchLabels:
      k8s-app: filebeat
  template:
    metadata:
      labels:
        k8s-app: filebeat
    spec:
      serviceAccountName: filebeat
      terminationGracePeriodSeconds: 30
      hostNetwork: true
      dnsPolicy: ClusterFirstWithHostNet
      containers:
        - name: filebeat
          image: my-own-repository-xxx.org:5000/elastic/filebeat:7.13.3
          args: ["-c", "/etc/filebeat.yml", "-e"]
          env:
            - name: NODE_NAME
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
            - name: POD_NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
          securityContext:
            runAsUser: 0
          resources:
            limits:
              memory: 200Mi
            requests:
              cpu: 100m
              memory: 100Mi
          volumeMounts:
            - name: config
              mountPath: /etc/filebeat.yml
              readOnly: true
              subPath: filebeat.yml
            - name: data
              mountPath: /usr/share/filebeat/data
            - name: varlibdockercontainers
              mountPath: /xxx/data/docker/containers/
              readOnly: true
            - name: varlog
              mountPath: /var/log
              readOnly: true
      volumes:
        - name: config
          configMap:
            defaultMode: 0600
            name: filebeat-config
        - name: varlibdockercontainers
          hostPath:
            path: /va/data/docker/containers/
        - name: varlog
          hostPath:
            path: /var/log
        # data folder stores a registry of read status for all files, so we don't send everything again on a Filebeat pod restart
        - name: data
          hostPath:
            path: /var/lib/filebeat-data
            type: DirectoryOrCreate
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: filebeat
subjects:
  - kind: ServiceAccount
    name: filebeat
    namespace: cma-logging
roleRef:
  kind: ClusterRole
  name: filebeat
  apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
  name: filebeat
  labels:
    k8s-app: filebeat
rules:
  - apiGroups: [""] # "" indicates the core API group
    resources:
      - namespaces
      - pods
      - nodes
    verbs:
      - get
      - watch
      - list
  - apiGroups: ["apps"]
    resources:
      - replicasets
    verbs:
      - get
      - watch
      - list
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: filebeat
  namespace: cma-logging
  labels:
    k8s-app: filebeat

logstash-deployment.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: logstash-configmap
  namespace: cma-logging
data:
  logstash.yml: |
    http.host: "0.0.0.0"
    path.config: /usr/share/logstash/pipeline
    pipeline.batch.size: 50
    pipeline.batch.delay: 50
  logstash.conf: |
    input {
      beats {
        port => 5044
      }
    }
    filter {
      grok {
        match => { "message" => "%{TIMESTAMP_ISO8601:logdate} \[%{DATA:traceId}\] \[%{DATA:threadId}\] \[%{DATA:application}\] %{LOGLEVEL:level} %{SPACE}%{GREEDYDATA:raw}" }
      }
      if "_grokparsefailure" in [tags] {
        drop { }
      }
      date {
        match => [ "logdate" , "yyyy-MM-dd HH:mm:ss.SSS" ]
      }
    }
    output {
      elasticsearch {
          hosts    => "https://elasticsearch.xxx.com:443"
          proxy    => "http://xx.xx.xx.xx:80"
          index    => "cma-non-prod"
          user     => "xxx"
          password => "yyy"
      }
      stdout { codec => rubydebug }
    }
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: logstash-deployment
  namespace: cma-logging
spec:
  replicas: 1
  selector:
    matchLabels:
      app: logstash
  template:
    metadata:
      labels:
        app: logstash
    spec:
      nodeName: node4
      containers:
      - name: logstash
        image: my-own-repository-xxx:5000/logstash:7.13.3
        ports:
        - containerPort: 5044
        volumeMounts:
          - name: config-volume
            mountPath: /usr/share/logstash/config
          - name: logstash-pipeline-volume
            mountPath: /usr/share/logstash/pipeline
      volumes:
      - name: config-volume
        configMap:
          name: logstash-configmap
          items:
            - key: logstash.yml
              path: logstash.yml
      - name: logstash-pipeline-volume
        configMap:
          name: logstash-configmap
          items:
            - key: logstash.conf
              path: logstash.conf

Using AutoDiscovery can help to get pod namespace.

My working config with namespace filtering...

apiVersion: v1
kind: ConfigMap
metadata:
  name: filebeat-config
  namespace: logging
  labels:
    k8s-app: filebeat
data:
  filebeat.yml: |-                  
    filebeat.autodiscover:
      providers:
        - type: kubernetes
          node: ${NODE_NAME}
          scope: node
          templates:
            - condition:
                equals:
                  kubernetes.namespace: "cma-prod"
              config:
                - type: container
                  paths:
                    - /var/data/docker/containers/*/*.log
    
    # for debuging
    #output.console:
    #  pretty: true
    output.logstash:
      hosts: ['logstash-service:5044']

    # You can set logging.level to debug to see the generated events by the running filebeat instance.
    #logging.level: info
    #logging.to_files: false
    #logging.files:
    #  path: /var/log/filebeat
    #  name: filebeat
    #  keepfiles: 7
    #  permissions: 0644
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: filebeat
  namespace: logging
  labels:
    k8s-app: filebeat
spec:
  selector:
    matchLabels:
      k8s-app: filebeat
  template:
    metadata:
      labels:
        k8s-app: filebeat
    spec:
      serviceAccountName: filebeat
      terminationGracePeriodSeconds: 30
      hostNetwork: true
      dnsPolicy: ClusterFirstWithHostNet
      containers:
        - name: filebeat
          image: pvnexusho001th.dmz.th-foo.org:5000/elastic/filebeat:7.13.3
          args: ["-c", "/etc/filebeat.yml", "-e"]
          env:
            - name: NODE_NAME
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
            - name: POD_NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
          securityContext:
            runAsUser: 0
          resources:
            limits:
              memory: 200Mi
            requests:
              cpu: 100m
              memory: 100Mi
          volumeMounts:
            - name: config
              mountPath: /etc/filebeat.yml
              readOnly: true
              subPath: filebeat.yml
            - name: data
              mountPath: /usr/share/filebeat/data
            - name: varlibdockercontainers
              mountPath: /var/data/docker/containers/
              readOnly: true
            - name: varlog
              mountPath: /var/log
              readOnly: true
      volumes:
        - name: config
          configMap:
            defaultMode: 0600
            name: filebeat-config
        - name: varlibdockercontainers
          hostPath:
            path: /var/data/docker/containers/
        - name: varlog
          hostPath:
            path: /var/log
        # data folder stores a registry of read status for all files, so we don't send everything again on a Filebeat pod restart
        - name: data
          hostPath:
            path: /var/lib/filebeat-data
            type: DirectoryOrCreate
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: filebeat
subjects:
  - kind: ServiceAccount
    name: filebeat
    namespace: logging
roleRef:
  kind: ClusterRole
  name: filebeat
  apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
  name: filebeat
  labels:
    k8s-app: filebeat
rules:
  - apiGroups: [""] # "" indicates the core API group
    resources:
      - namespaces
      - pods
      - nodes
    verbs:
      - get
      - watch
      - list
  - apiGroups: ["apps"]
    resources:
      - replicasets
    verbs:
      - get
      - watch
      - list
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: filebeat
  namespace: logging
  labels:
    k8s-app: filebeat