Logstash is delayed by 20mins reading from an S3 bucket

I've set up a couple of logstashen running in ECS on AWS. They pull the ALB logs from the S3 buckets they dump then in to and one pokes them in to ES and the other pokes them in to Loki.
Both end up being around 20-30mins delayed, i.e. the logs take 23 mins (or so) to get from the ALB to ES (or Loki).
I have tried a test where I use a stdout output section and watch the logs scroll up a terminal and in that configuration the logs are around 2-3mins delayed, which is totally acceptable.
My conclusion is that the latency is within logstash, but the cpu utilisation on the ECS Tasks doesn't go over 20% and is mostly somewhere round 5% and there is no pressure on the memory, and no swap being used.
The input plugin by default reads every 60s which is fine by me, so why is it taking 20mins for my data to hit the dashboards.
There are no helpful errors in any of the logs.

I'm using logstash 7.14.0 and ES 7.10.1

I guess I should upgrade both to the latest version and will do that soon, but if anyone has other suggestions I'd be grateful to read them.

The config for ES looks like:


input {
    s3 {
        bucket => "jobpixel-test-alb"
        region => "us-east-2"
        sincedb_path => "/mnt/sincedb/jptestalb"
        add_field => {
            "doctype" => "aws-application-load-balancer"
            "log_format" => "aws-application-load-balancer"
            "source" => "test-alb"
        }
    }
}

filter {
    if [doctype] == "aws-application-load-balancer" or [log_format] == "aws-application-load-balancer" {
        grok {
            match => [ "message", '%{NOTSPACE:request_type} %{TIMESTAMP_ISO8601:log_timestamp} %{NOTSPACE:alb-name} %{NOTSPACE:client} %{NOTSPACE:target} %{NOTSPACE:request_processing_time:float} %{NOTSPACE:target_processing_time:float} %{NOTSPACE:response_processing_time:float} %{NOTSPACE:elb_status_code} %{NOTSPACE:target_status_code:int} %{NOTSPACE:received_bytes:float} %{NOTSPACE:sent_bytes:float} %{QUOTEDSTRING:request} %{QUOTEDSTRING:user_agent} %{NOTSPACE:ssl_cipher} %{NOTSPACE:ssl_protocol} %{NOTSPACE:target_group_arn} %{QUOTEDSTRING:trace_id} "%{DATA:domain_name}" "%{DATA:chosen_cert_arn}" %{NOTSPACE:matched_rule_priority} %{TIMESTAMP_ISO8601:request_creation_time} "%{DATA:actions_executed}" "%{DATA:redirect_url}" "%{DATA:error_reason}" "%{DATA:target_port_list}" "%{DATA:target_status_code_list}"']
        }
        date {
            match => [ "log_timestamp", "ISO8601" ]
        }
        mutate {
            gsub => [
                "request", '"', "",
                "trace_id", '"', "",
                "user_agent", '"', ""
            ]
        }
        if [request] {
            grok {
                match => ["request", "(%{NOTSPACE:http_method})? (%{NOTSPACE:http_uri})? (%{NOTSPACE:http_version})?"]
            }
        }
        if [http_uri] {
            grok {
                match => ["http_uri", "(%{WORD:protocol})?(://)?(%{IPORHOST:domain})?(:)?(%{INT:http_port})?(%{GREEDYDATA:request_uri})?"]
            }
        }
        if [client] {
            grok {
                match => ["client", "(%{IPORHOST:c_ip})?"]
            }
        }
        if [target_group_arn] {
            grok {
                match => [ "target_group_arn", "arn:aws:%{NOTSPACE:tg-arn_type}:%{NOTSPACE:tg-arn_region}:%{NOTSPACE:tg-arn_aws_account_id}:targetgroup\/%{NOTSPACE:tg-arn_target_group_name}\/%{NOTSPACE:tg-arn_target_group_id}" ]
            }
        }
        if [c_ip] {
            geoip {
                source => "c_ip"
                target => "geoip"
            }
        }
        if [user_agent] {
            useragent {
                source => "user_agent"
                prefix => "ua_"
            }
        }
    }
}

output {
  elasticsearch {
    hosts => ["elasticsearch.monlb:9200"]
    index => "%{source}-%{+YYYY.MM.dd}"
  }
}

and the Loki config

input {
    s3 {
        bucket => "jobpixel-test-alb"
        region => "us-east-2"
        sincedb_path => "/mnt/sincedb/jptestalb"
        add_field => {
            "source" => "test-alb"
            "region" => "us-east-2"
        }
}

filter {
    grok {
      match => [ "message", '%{NOTSPACE:request_type} %{TIMESTAMP_ISO8601:log_timestamp} %{GREEDYDATA:trash}' ]
      remove_field => [ "request_type", "trash" ]
    }
    date {
      match => [ "log_timestamp", "ISO8601" ]
      remove_field => [ "log_timestamp" ]
    }
}

output {
  loki {
    url => "http://loki.mon:3100/loki/api/v1/push"
  }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.