Hey all,
Ok so filebeats are now using yaml and not json. A while back I followed the advice of the logstash book and formatted my apache logs like this:
LogFormat "{ \
\"host\":\"ref.example.com.example.com\", \
\"path\":\"/var/log/httpd/jf_ref.example.com_access_log\", \
\"tags\":[\"example ref.example.com\"], \
\"message\": \"%h %l %u %t \\\"%r\\\" %>s %b\", \
\"timestamp\": \"%{%Y-%m-%dT%H:%M:%S%z}t\", \
\"clientip\": \"%a\", \
\"duration\": %D, \
\"status\": %>s, \
\"request\": \"%U%q\", \
\"urlpath\": \"%U\", \
\"urlquery\": \"%q\", \
\"method\": \"%m\", \
\"bytes\": %B, \
\"vhost\": \"%v\" \
}" ref.example.com_access_json
CustomLog logs/jf_ref.example.com_access_log ref.example.com_access_json
LogLevel error
ErrorLog logs/jf_ref.example.com_error_log
Which had a beautiful and convenient result under luberjack which includes such useful things as HTTP status, request, and so on. HTTP status was particularly useful for drawing graphs in Kibana. As well as being useful for generating nagios alerts when detecting a 404:
{
"_index": "logstash-2016.02.05",
"_type": "apache_ref_access",
"_id": "AVKwtt8oMSH3nI3K3Jjm",
"_score": null,
"_source": {
"host": "web1",
"path": "/var/log/httpd/jf_ref.example.com_access_log",
"tags": [
"example ref.example.com"
],
"message": "::1 - - [05/Feb/2016:03:14:42 -0500] \"GET /server-status?auto HTTP/1.1\" 200 437",
"timestamp": "2016-02-05T03:14:42-0500",
"clientip": "::1",
"duration": 1799,
"status": 200,
"request": "/server-status?auto",
"urlpath": "/server-status",
"urlquery": "?auto",
"method": "GET",
"bytes": 437,
"vhost": "ref.example.com",
"@version": "1",
"@timestamp": "2016-02-05T08:14:47.371Z",
"type": "apache_ref_access",
"file": "/var/log/httpd/jf_ref.example.com_access_log",
"offset": "15366916"
},
"fields": {
"@timestamp": [
1454660087371
]
},
"highlight": {
"host.raw": [
"@kibana-highlighted-field@web1@/kibana-highlighted-field@"
],
"host": [
"@kibana-highlighted-field@web1@/kibana-highlighted-field@"
]
},
"sort": [
1454660087371
]
}
But I can't get the same result when I use filebeats. Below is the result I get when I'm using filebeats:
{
"_index": "logstash-2016.02.09",
"_type": "apache_ref_access",
"_id": "AVLG7CFgMSH3nI3Kx5z-",
"_score": null,
"_source": {
"message": "{ \"host\":\"ref.example.com.example.com\", \"path\":\"/var/log/httpd/jf_ref.example.com_access_log\", \"tags\":[\"example ref.example.com\"], \"message\": \"83.142.160.6 - - [09/Feb/2016:10:41:20 -0500] \\\"POST /joke_details.php?joke_id=id12&v=0 HTTP/1.1\\\" 200 6076\", \"timestamp\": \"2016-02-09T10:41:20-0500\", \"clientip\": \"83.142.160.6\", \"duration\": 1197950, \"status\": 200, \"request\": \"/joke_details.php?joke_id=id12&v=0\", \"urlpath\": \"/joke_details.php\", \"urlquery\": \"?joke_id=id12&v=0\", \"method\": \"POST\", \"bytes\": 6076, \"vhost\": \"ref.example.com\" }",
"@version": "1",
"@timestamp": "2016-02-09T15:41:22.915Z",
"beat": {
"hostname": "web1",
"name": "filebeat"
},
"count": 1,
"fields": {
"service": "apache",
"type": "apache_ref_access"
},
"input_type": "log",
"offset": 4672153,
"source": "/var/log/httpd/jf_ref.example.com_access_log",
"tags": [
"example-dev",
"web-tier",
"beats",
"beats_input_codec_plain_applied"
],
"type": "apache_ref_access",
"host": "web1"
},
"fields": {
"@timestamp": [
1455032482915
]
},
"highlight": {
"fields.type": [
"@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
],
"type.raw": [
"@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
],
"fields.type.raw": [
"@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
],
"type": [
"@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
]
},
"sort": [
1455032482915
]
}
And there's not nearly as much useful info there. I realize that my apache logs are outputting in JSON and logstash is able to understand that and produce these beautiful results. How do I get the same useful info out of filebeats that I did under lumberjack? Is there a way to recreate this effect under yaml?
Thanks