I'm using the file input to ingest cloudflare logs into elasticsearch. I have a cronjob that runs every 10 minutes and dumps a gzip file ranging from a few hundred KB to a couple MB in size into the read path. Logstash randomly skips files for reasons yet unknown. I don't see any of the skipped filenames in debug logs. I do see an occasional plugin unrecoverable error that restarts the pipeline. My logstash config is as follows:
input {
file {
path => "/var/spool/logstash/cloudflare*.gz"
sincedb_path => "/var/spool/logstash/cloudflare_logs.sdb"
sincedb_clean_after => "30m"
mode => "read"
file_completed_action => "log_and_delete"
file_completed_log_path => "/var/log/logstash/cloudflare_processed.log"
#file_sort_by => "path"
tags => ["cloudflare"]
}
}
filter {
if "cloudflare" in [tags] {
json {
source => "message"
}
date {
match => ["EdgeStartTimestamp", "ISO8601"]
target => "@timestamp"
}
geoip {
source => "ClientIP"
}
}
}
output {
if "cloudflare" in [tags] {
elasticsearch {
hosts => ["obc-es1:9200", "obc-es2:9200", "obc-es3:9200"]
index => "cloudflare-%{+YYYY.MM.dd}"
}
}
}