Filebeat recreate Historical Data index After Deleting Old Indexes

I use filebeat to get k8s logs,then send it to logstash which do some convert then send to different es index.

I have tried deleting the k8s-app-{year}-{month}-{day} indexes from Elasticsearch for the year 2023,but they are recreated shortly after.Upon further investigation, I found a single log file with entries starting from the year 2023, which seems to trigger the index recreation.

But I can find this log in registry file :

[root@k8s-node2 ~]# cat /var/lib/filebeat-data/registry/filebeat/data.json | python -m json.tool  | grep -A 5 -B 8 'de636319e82c'
    },
    {
        "FileStateOS": {
            "device": 64768,
            "inode": 3753217
        },
        "meta": null,
        "offset": 930647828,
        "source": "/var/lib/docker/containers/de636319e82cbc190cd0cd290ff235ba8c39a83d2e8669c38351a6bc1c319979/de636319e82cbc190cd0cd290ff235ba8c39a83d2e8669c38351a6bc1c319979-json.log",
        "timestamp": "2024-03-20T08:58:19.670446592Z",
        "ttl": 2400000000000,
        "type": "container"
    },

it seems registry file do not work.Is there a way to prevent Filebeat send old logs to logstash which create new old indexes?

filebeat version:7.8
logstash version:7.12.1
elasticsearch:7.12.1

filebeat config:

apiVersion: v1
data:
  filebeat.yml: |-
    filebeat.inputs:
    - type: container
      close_removed: true
      ignore_older: 30m
      clean_inactive: 40m
      close_timeout: 30m
      paths:
        - /var/lib/docker/containers/*/*.log
    processors:
      - add_kubernetes_metadata:
          host: ${NODE_NAME}
          matchers:
          - logs_path:
              logs_path: "/var/lib/docker/containers/"
      - drop_fields:
          fields: ["input", "agent", "ecs", "stream", "log", "kubernetes.node.name", "kubernetes.labels.pod-template-hash", "kubernetes.labels.io_kompose_network/edgex-network", "kubernetes.pod.uid", "kubernetes.replicaset.name", "kubernetes.labels.s-product", "kubernetes.labels.s-service", "kubernetes.labels.io_kompose_service"]
      - rename:
          fields:
            - from: "kubernetes.namespace"
              to: "env"
            - from: "kubernetes.container.name"
              to: "servicename"
      - drop_event:
          when:
            or:
              - contains:
                  env: "log-prod"
              - contains:
                  env: "kube-system"
              - contains:
                  env: "istio-system"
              - contains:
                  env: "metallb-system"
              - contains:
                  "servicename.keyword": "biz-pmms-measurement"
              - contains:
                  "servicename.keyword": "oap"
              - contains:
                  "servicename": "alarm-runner"
    output.logstash:
      hosts: ["192.168.100.112:9988"]
kind: ConfigMap
....

logstsh config:

input{
        beats{
                host => "0.0.0.0"
                port => 9988
                type => "k8s-app"   }
}

filter {
  if [env] == "micrvice-pre" or [env]=="micrvice-test"{
     multiline {
                pattern => "^\d{4}\-\d{2}\-\d{2}\s\d+\:\d+\:\d+\.\d+\s"
                negate => true
                what => "previous"        }
   }
......
  if [env] =~ "edgex"{
     grok{
        match => {
                        "message" => ["%{LOGLEVEL:leve} %{SPACE} %{TIMESTAMP_ISO8601:datetime} %{SPACE} %{NOTSPACE:files} %{SPACE} %{NOTSPACE:gatewayid} %{SPACE} %{NOTSPACE:externalid} \"%{GREEDYDATA:msg}\""]
                }
        }
   }
.......
   }
}

output{

        if [env]  == "runo-test"{
        elasticsearch{
                hosts => ["192.168.111.132:9200","192.168.111.133:9200","192.168.111.135:9200"]
                index => "rundo-test-%{+YYYY.MM.dd}"  }
         }

        if [env]  == "micrvice-test"{
        elasticsearch{
                hosts => ["192.168.111.132:9200","192.168.111.133:9200","192.168.111.135:9200"]
                index => "bmp-micro-test-%{+YYYY.MM.dd}" }
             }

        if [env] =~ "edgex"{
         elasticsearch{
                hosts => ["192.168.111.132:9200","192.168.111.133:9200","192.168.111.135:9200"]
                index => "edgex-%{env}-%{+YYYY.MM.dd}"          }
        }
        if [type] in "k8s-app" and [env]  != "runo-test" and [env]  != "bop-test" and  [env]  != "bop-pre" and [env]  != "micrvice-test" and [env]  != "micrvice-pre" and [env] != "runo-cloud-pre" and [env] !~ "edgex"{
        elasticsearch{
                hosts => ["192.168.111.132:9200","192.168.111.133:9200","192.168.111.135:9200"]
                index => "k8s-app-%{+YYYY.MM.dd}"   }
           }
      }

BTW,not all log file store offset in registry file.

microservice-pre--stanok 8c425a2e2512,            registry offset not match file size
...
edgex-test-env-softbidder1 6b85cb758b79,           registry offset match file size
....
runo-cloud-agent-test--dc 9f609ec09cb2,               log not found in registry file
.....

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.