I have nginx logs with the following format:
192.168.0.1 - - [18/Jul/2022:11:20:28 +0000] "GET / HTTP/1.1" 200 15 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" "-"
192.168.128.1 - - [18/Jul/2022:13:22:15 +0000] "GET / HTTP/1.1" 200 615 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" "-"
I am using the following pipeline to parse them and store them into elasticsearch:
input {
beats {
port => 5044
}
}
filter {
grok {
match => [ "message" , "%{COMBINEDAPACHELOG}+%{GREEDYDATA:extra_fields}"]
}
mutate {
convert => ["response", "integer"]
convert => ["bytes", "integer"]
convert => ["responsetime", "float"]
}
geoip {
source => "clientip"
target => "geoip"
add_tag => [ "nginx-geoip" ]
}
date {
match => [ "timestamp" , "dd/MMM/YYYY:HH:mm:ss Z" ]
}
useragent {
source => "agent"
}
}
output {
elasticsearch {
hosts => ["http://elasticsearch:9200"]
index => "weblogs-%{+YYYY.MM.dd}"
document_type => "nginx_logs"
user => "elastic"
password => "changeme"
}
stdout { codec => rubydebug }
}
However, it seems that the part of useragent does not work, since I cannot see it:
{
"httpversion" => "1.1",
"clientip" => "192.168.0.1",
"ident" => "-",
"timestamp" => "18/Jul/2022:11:20:28 +0000",
"verb" => "GET",
"@timestamp" => 2022-07-18T11:20:28.000Z,
"@version" => "1",
"tags" => [
[0] "beats_input_codec_plain_applied",
[1] "_geoip_lookup_failure"
],
"host" => {
"name" => "9a852bd136fd"
},
"auth" => "-",
"bytes" => 15,
"referrer" => "\"-\"",
"geoip" => {},
"message" => "192.168.0.1 - - [18/Jul/2022:11:20:28 +0000] \"GET / HTTP/1.1\" 200 15 \"-\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36\" \"-\"",
"response" => 200,
"agent" => {
"version" => "7.3.2",
"ephemeral_id" => "0c38336d-1e30-4aaa-9ba8-20bd7bd8fb48",
"type" => "filebeat",
"hostname" => "9a852bd136fd",
"id" => "8991142a-95df-4aed-a190-bda4649c04cd"
},
"input" => {
"type" => "log"
},
"request" => "/",
"extra_fields" => " \"-\"",
"log" => {
"file" => {
"path" => "/var/log/nginx/access.log"
},
"offset" => 11021
},
"ecs" => {
"version" => "1.0.1"
}
}
What I need is to have a field including the whole http_user_agent
content. Any idea of what is causing the error?