I'm using logstash to parse logs from a hosting provider (Acquia), and send them to an elastic.co hosted elasticsearch account.
We have a lot of apache records that don't have a client ip at the start of the log entry. For these records, I'm getting a _grokparsefailure
attached to the elasticsearch document.
Is there a way to get this to pass?
Raw apache record
- - - [20/Mar/2017:14:12:05 +0000] "GET /foo/bar HTTP/1.1" 301 237 "-" "Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)" vhost=abc123.devcloud.acquia-sites.com host=www.example.com hosting_site=s23p523 pid=4671 request_time=1218 forwarded_for="111.22.33.44, 10.11.12.13" request_id="v-325de3f6-0d77-11e7-97a7-22000a91abd9"
Logstash config
input {
file {
path => "/opt/logs/*_access.log"
start_position => "beginning"
type => "apache"
}
}
filter {
if [type] == "apache" {
mutate {
remove_field => [ "host" ]
}
grok {
match => [
"message", "%{COMBINEDAPACHELOG} vhost=%{IPORHOST:vhost} host=%{IPORHOST:host}"
]
}
geoip { source => "clientip" }
date {
locale => "en"
match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]
}
}
}
output {
elasticsearch {
hosts => [ "https://elastic.example.com" ]
}
}
Elasticsearch record
{
"_index": "logstash-2017.03.20",
"_type": "apache",
"_id": "AVrsFuWrfNwGLfIgIsDP",
"_score": null,
"_source": {
"path": "/opt/logs/example-prod_access.log",
"@timestamp": "2017-03-20T14:20:07.640Z",
"geoip": {},
"@version": "1",
"message": "- - - [20/Mar/2017:14:12:05 +0000] "GET /foo/bar HTTP/1.1" 301 237 "-" "Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)" vhost=abc123.devcloud.acquia-sites.com host=www.example.com hosting_site=s23p523 pid=4671 request_time=1218 forwarded_for="216.244.66.239, 10.170.207.237" request_id="v-325de3f6-0d77-11e7-97a7-22000a91abd9" ",
"type": "apache",
"tags": [
"_grokparsefailure",
"_geoip_lookup_failure"
]
},
"fields": {
"@timestamp": [
1490019607640
]
},
"sort": [
1490019607640
]
}