We are using the s3 input plugin to process our ELB logs and analyze them in Elasticsearch. Frequently, but inconsistently, it stops processing the log part-way through the file. Running Logstash in debug mode hasn't revealed any additional information. Though the actual logs found in s3 are complete, the processed logs in Elasticsearch are not.
Logstash Version: 5.5.1
Java Version: OpenJDK 1.8.0_65
S3 Input Plugin version: 3.1.5
Elasticsearch Version: 5.2.0
Here is a redacted portion of our Logstash configuration:
input {
s3 {
access_key_id => "XXXXX"
bucket => "XXXXX"
interval => 300
prefix => "XXXXX"
region => "us-east-1"
secret_access_key => "XXXXX"
temporary_directory => "/var/lib/logstash/data"
add_field => {
"[@metadata][type]" => "access"
}
tags => ["cloud", "s3", "UTC"]
}
}
filter {
grok {
add_field => {
"s3_key" => "%{[@metadata][s3][key]}"
}
id => "s3_grok"
match => {
"message" => [
"%{URIPROTO:http_protocol} %{TIMESTAMP_ISO8601:timestamp} %{DATA:elb} %{IP:clientip}:%{POSINT} (?:-|%{IP:target_ip}:%{POSINT}) %{NUMBER:request_processing_time:float} %{NUMBER:target_processing_time:float} %{NUMBER:response_processing_time:float} %{NUMBER:elb_status_code:int} (?:-|%{NUMBER:target_status_code:int}) %{NUMBER:received_bytes:int} %{NUMBER:sent_bytes:int} "%{NOTSPACE:http_request_type} (?:%{URI:http_request_uri}|%{NOTSPACE:http_request_uri}) (?:-|HTTP/%{NUMBER:http_version:float})" "(?:-|%{DATA:http_user_agent})" (?:-|%{DATA:ssl_cipher}) (?:-|%{DATA:ssl_protocol}) (?:-|%{DATA:target_group_arn}) "(?:-|%{DATA:amzn_trace_id})""
]
}
}
useragent {
id => "useragent"
remove_field => "http_user_agent"
source => "http_user_agent"
target => "ua"
}
date {
id => "utc_date"
locale => "en"
match => [
"timestamp",
# ISO8601 date format
"ISO8601"
]
remove_field => ["timestamp"]
remove_tag => ["UTC"]
timezone => "UTC"
}
}
output {
elasticsearch {
codec => "json"
id => "elasticsearch"
index => "%{[@metadata][type]}-%{+YYYY.MM.dd}"
hosts => ["XXX"]
manage_template => false
}
}