All right, this is the input configuration, placed on server 1 (running redis and logstash):
input {
file {
path => "/path/to/logs/*.log"
type => "logstash"
tags => ["bluecoat"]
start_position => "beginning"
}
}
Here is the configuration on server 2 (running elasticserach and logstash):
input {
redis {
host => "server1"
data_type => "list"
type => "redis-input"
key => "logstash"
threads => 4 # number (optional), default: 1
}
}
filter {
if [type] == "logstash" {
if [path] == "/path/to/log/file-name.log" {
# drop comment lines
if ([message] =~ /^#/) {
drop{}
}
csv {
columns => ["localtime", "time_taken", "c_ip", "cs_x_forwarded_for", "sc_status", "s_action", "sc_bytes", "cs_bytes", "cs_method", "cs_uri_scheme", "cs_host", "cs_uri_port", "cs_uri_path", "cs_uri_query", "cs_username", "cs_auth_group", "s_hierarchy", "s_supplier_name", "rs_content_type", "cs_referer", "cs_user_agent", "sc_filter_result", "cs_categories", "x_virus_id", "s_ip"]
separator => " "
source => message
remove_field => ["message","host","path","@version","@timestamp"]
}
if [localtime] {
date {
match => ["localtime", "[dd/MMM/YYYY:HH:mm:ss Z]"]
}
}
#enrich log entry with destination geolocation info
if ([c_ip] and [c_ip] != "-" ){
mutate {
add_field => {"cs_host_ip" => "%{c_ip}"}
}
dns {
resolve => ["cs_host_ip"]
action => "replace"
}
geoip {
source => "cs_host_ip"
}
}
# parse User-Agent header
if ([cs_user_agent] and [cs_user_agent] != "" and [cs_user_agent] != "-") {
useragent {
source => "cs_user_agent"
prefix => "user_agent_"
}
}
# split Blue web site categories into an array
if ([cs_categories] and [cs_categories] != "" and [cs_categories] != "-") {
mutate {
split => { "cs_categories" => ";" }
}
}
# type convert number fields
mutate {
convert => ["sc_bytes", "integer",
"time_taken", "integer",
"r_port", "integer",
"s_port", "integer",
"cs_bytes", "integer",
"duration", "integer" ]
}
}
}
}
I have to mention that bluecoat logs are not the only ones treated by Logstash. I have other logs coming via Syslog, and their parsing works fine.
And here is what a single log line looks like:
"[21/May/2014:17:12:44 +0100]" 18 10.20.2.33 - 200 TCP_HIT 5689 169 GET http www.google.fr 80 / - NavInternet - - 211.85.89.15 pub/sky - - OBSERVED "url_allowed;Search Engines/Portals" - 163.10.165.24
Thanks in advance.