Success! The following monolithic Logstash config file splits events properly (document_type is set to 'nginx-access' in the test host's Filebeat config):
# Wazuh - Logstash configuration file
## Remote Wazuh Manager - Filebeat input
input {
beats {
port => 5000
codec => "json_lines"
# ssl => true
# ssl_certificate => "/etc/logstash/logstash.crt"
# ssl_key => "/etc/logstash/logstash.key"
}
}
filter {
if [type] == "nginx-access" {
# NGINX access log processing block
grok {
match => {
"message" => '%{IPORHOST:remote_ip} - %{DATA:user_name} \[%{HTTPDATE:time}\] "%{WORD:request_action} %{DATA:request} HTTP/%{NUMBER:http_version}" %{NUMBER:response} %{NUMBER:bytes} "%{DATA:referrer}" "%{DATA:agent}"'
}
}
date {
match => [ "time", "dd/MMM/YYYY:HH:mm:ss Z" ]
locale => en
target => "@timestamp"
}
geoip {
source => "remote_ip"
target => "geoip"
}
useragent {
source => "agent"
target => "user_agent"
}
}
else
# WAZUH processing block
{
geoip {
source => "srcip"
target => "GeoLocation"
fields => ["city_name", "continent_code", "country_code2", "country_name", "region_name", "location"]
}
date {
match => ["timestamp", "ISO8601"]
target => "@timestamp"
}
mutate {
remove_field => [ "timestamp", "beat", "fields", "input_type", "tags", "count", "@version", "log", "offset", "type"]
}
}
}
output {
if [type] == 'nginx-access' {
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "nginx-access-%{+YYYY.MM.dd}"
document_type => "nginx-access"
template => "/etc/logstash/nginx_template.json"
template_name => "nginx-template"
template_overwrite => true
}
}
else
{
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "wazuh-alerts-%{+YYYY.MM.dd}"
document_type => "wazuh"
template => "/etc/logstash/wazuh-elastic5-template.json"
template_name => "wazuh"
template_overwrite => true
}
}
}
The generated events are as such:
{
"_index": "nginx-access-2017.06.19",
"_type": "nginx-access",
"_id": "AVy_QFslG0_ypPrxPPao",
"_score": null,
"_source": {
"request": "/admin/",
"request_action": "GET",
"agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
"geoip": {
"timezone": "Europe/London",
"ip": "XX.XX.XX.XX",
"latitude": 51.48,
"continent_code": "EU",
"city_name": "Cardiff",
"country_code2": "GB",
"country_name": "United Kingdom",
"country_code3": "GB",
"region_name": "Cardiff",
"location": [
-3.18,
51.48
],
"postal_code": "XXXX",
"longitude": -3.18,
"region_code": "CRF"
},
"offset": 4050807,
"user_name": "-",
"input_type": "log",
"http_version": "1.1",
"source": "/webapp/logs/nginx_access.log",
"message": "XX.XX.XX.XX - - [19/Jun/2017:07:28:01 +0000] \"GET /admin/ HTTP/1.1\" 200 996 \"https://XXXX/admin/auth/group/\" \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36\"",
"type": "nginx-access",
"tags": [
"_jsonparsefailure",
"beats_input_codec_json_applied"
],
"referrer": "https://XXXX/admin/auth/group/",
"@timestamp": "2017-06-19T07:28:01.000Z",
"remote_ip": "XX.XX.XX.XX",
"response": "200",
"bytes": "996",
"@version": "1",
"beat": {
"hostname": "XX",
"name": "XXX",
"version": "5.4.1"
},
"host": "XXX",
"time": "19/Jun/2017:07:28:01 +0000",
"user_agent": {
"patch": "3029",
"os": "XXXX",
"major": "58",
"minor": "0",
"name": "Chrome",
"os_name": "XXXX",
"device": "Other"
}
},
"fields": {
"@timestamp": [
1497857281000
]
},
"sort": [
1497857281000
]
}
In this case I'm getting the template from here: https://github.com/elastic/examples/tree/master/ElasticStack_NGINX - big thanks to you for all of the help, Magnus. I need to clean up the JSON parsing issues creating those tags, but this works!