I am using some grok filter to parse the Syslog from TCP input and store them into S3. What I'm seeing is some malformed JSON. Some syslog events are good and some are not (Multiple events are grouping together as a single event.)
Logstash conf:
input {
tcp {
port => 5000
}
}
filter {
mutate {
rename => { "host" => "origin_host" }
}
grok {
match => {
"message" => "%{TIMESTAMP_ISO8601:syslog_timestamp} %{HOSTNAME:syslog_hostname} %{NOTSPACE:log_type} %{GREEDYDATA:syslog_message} "
}
}
mutate {
remove_field => ["message"]
}
}
output {
s3 {
id => "logoutput"
bucket => "my-s3-bucket"
prefix => "/s3_syslog"
temporary_directory => "/app/logs_QUEUE"
encoding => "gzip"
canned_acl => "bucket-owner-full-control"
rotation_strategy => "size_and_time"
size_file => 13107200
time_file => 5
codec => json_lines
}
}
Output:
Proper JSON
{
@timestamp: 2022-08-04T15:25:00.144Z
@version: 1
etl_hostname: myhost-1b-ps003.org.mydomain.net
source_timestamp: 2022-08-04T15:25:00.244Z
log_type: NOTICE
origin_host: ip-192.92.99.00.ec2.internal
port: 37675
syslog_hostname: s01.myhostname
syslog_message: 2081 - - FLEX: syslog header message
syslog_timestamp: 2022-08-04T15:25:00.127+0000
}
Malformed/Invalid JSON
{"syslog_hostname":myhostname.host","port":38630,"syslog_message":"1431 - - TID[139654444865280], [API-VPORT] loadAllMetadataApplicationProfiles() loadAllMetadataApplicationProfiles","log_type":"ugwd.NOTICE","origin_host":"my-ip.ec2.internal","syslog_timestamp":"2022-08-04T15:20:00.906+0000","source_timestamp":"2022-08-04T15:20:01.043Z","@timestamp":"2022-08-04T15:20:00.907Z","etl_hostname":"myhost-1c-ps001.org.mydomain.net","@version":"1"}
{"syslog_hostname":"s01.myhost.usa","port":58303,"syslog_message":"2268 - - loadAllGsGroups","log_type":"ugwd.NOTICE","origin_host":"ip-10.10.01.01.ec2.internal","syslog_timestamp":"2022-08-04T15:20:00.940+0000","source_timestamp":"2022-08-04T15:20:01.043Z","@timestamp":"2022-08-04T15:20:00.943Z","etl_hostname":"myhost-1c-ps001.org.mydomain.net","@version":"1"}