Based on thg logs, it seems that the filestream ID is not unique and this could cause data duplication. However, my rationale towards setting a combination of ${data.kubernetes.pod.name}
and ${data.kubernetes.container.id}
is that it should be unique for a container logs and it should keep the offset for that pod container.
Sample logs:
{"log.level":"error","@timestamp":"2023-06-19T14:20:01.402Z","log.logger":"input","log.origin":{"file.name":"input-logfile/manager.go","file.line":180},"message":"filestream input with ID 'fsid-kube-proxy-jq7kd-b723ebca5c230fc03f47ef25333d54b747165964468af853e9250d4f8c25a84b' already exists, this will lead to data duplication, please use a different ID","service.name":"filebeat","ecs.version":"1.6.0"}
Samples logs:
{"log.level":"info","@timestamp":"2023-06-19T14:20:01.402Z","log.logger":"input.filestream","log.origin":{"file.name":"compat/compat.go","file.line":132},"message":"Input 'filestream' stopped","service.name":"filebeat","id":"fsid-kube-proxy-jq7kd-b723ebca5c230fc03f47ef25333d54b747165964468af853e9250d4f8c25a84b","ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2023-06-19T14:20:01.402Z","log.logger":"input.filestream","log.origin":{"file.name":"filestream/input.go","file.line":320},"message":"Reader was closed. Closing.","service.name":"filebeat","id":"fsid-kube-proxy-jq7kd-b723ebca5c230fc03f47ef25333d54b747165964468af853e9250d4f8c25a84b","source_file":"filestream::fsid-kube-proxy-jq7kd-b723ebca5c230fc03f47ef25333d54b747165964468af853e9250d4f8c25a84b::native::50331770-66305","path":"/var/log/containers/kube-proxy-jq7kd_kube-system_kube-proxy-b723ebca5c230fc03f47ef25333d54b747165964468af853e9250d4f8c25a84b.log","state-id":"native::50331770-66305","ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2023-06-19T14:20:01.403Z","log.logger":"input.filestream","log.origin":{"file.name":"compat/compat.go","file.line":124},"message":"Input 'filestream' stopped","service.name":"filebeat","id":"fsid-kube-proxy-jq7kd-b723ebca5c230fc03f47ef25333d54b747165964468af853e9250d4f8c25a84b","ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2023-06-19T14:20:01.404Z","log.logger":"input.filestream","log.origin":{"file.name":"compat/compat.go","file.line":111},"message":"Input filestream starting","service.name":"filebeat","id":"fsid-kube-proxy-jq7kd-b723ebca5c230fc03f47ef25333d54b747165964468af853e9250d4f8c25a84b","ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2023-06-19T14:20:01.404Z","log.logger":"file_watcher","log.origin":{"file.name":"filestream/fswatch.go","file.line":138},"message":"Start next scan","service.name":"filebeat","ecs.version":"1.6.0"}
Config:
filebeat.autodiscover:
providers:
- type: kubernetes
node: ${NODE_NAME}
labels.dedot: false
annotations.dedot: false
hints.enabled: true
hints.default_config.enabled: false
templates:
- condition:
equals:
kubernetes.container.name: kube-proxy
config:
- type: filestream
id: fsid-${data.kubernetes.pod.name}-${data.kubernetes.container.id}
close_removed: false
clean_removed: false
paths:
- /var/log/containers/${data.kubernetes.pod.name}*${data.kubernetes.container.id}.log
parsers:
- container:
stream: all
format: auto
- multiline:
type: pattern
pattern: "^[a-z]|^[A-Z]"
negate: true
match: after
prospector.scanner.symlinks: true