I'm using the file input to ingest cloudflare logs into elasticsearch. I have a cronjob that runs every 10 minutes and dumps a gzip file ranging from a few hundred KB to a couple MB in size into the read path. Logstash randomly skips files for reasons yet unknown. I don't see any of the skipped filenames in debug logs. I do see an occasional plugin unrecoverable error that restarts the pipeline. My logstash config is as follows:
input {
    file {
       path => "/var/spool/logstash/cloudflare*.gz"
       sincedb_path => "/var/spool/logstash/cloudflare_logs.sdb"
       sincedb_clean_after => "30m"
       mode => "read"
       file_completed_action => "log_and_delete"
       file_completed_log_path => "/var/log/logstash/cloudflare_processed.log"
       #file_sort_by => "path"
       tags => ["cloudflare"]
    }
}
filter {
    if "cloudflare" in [tags] {
        json {
            source => "message"
        }
        date {
            match => ["EdgeStartTimestamp", "ISO8601"]
            target => "@timestamp"
        }
        geoip {
          source => "ClientIP"
        }
    }
}
output {
    if "cloudflare" in [tags] {
        elasticsearch {
          hosts => ["obc-es1:9200", "obc-es2:9200", "obc-es3:9200"]
          index => "cloudflare-%{+YYYY.MM.dd}"
        }
    }
}