Log formatting under lumberjack lost in filebeats

Hey all,

Ok so filebeats are now using yaml and not json. A while back I followed the advice of the logstash book and formatted my apache logs like this:

LogFormat "{ \
      \"host\":\"ref.example.com.example.com\", \
      \"path\":\"/var/log/httpd/jf_ref.example.com_access_log\", \
      \"tags\":[\"example ref.example.com\"], \
      \"message\": \"%h %l %u %t \\\"%r\\\" %>s %b\", \
      \"timestamp\": \"%{%Y-%m-%dT%H:%M:%S%z}t\", \
      \"clientip\": \"%a\", \
      \"duration\": %D, \
      \"status\": %>s, \
      \"request\": \"%U%q\", \
      \"urlpath\": \"%U\", \
      \"urlquery\": \"%q\", \
      \"method\": \"%m\", \
      \"bytes\": %B, \
      \"vhost\": \"%v\" \
    }" ref.example.com_access_json
    CustomLog logs/jf_ref.example.com_access_log ref.example.com_access_json
    LogLevel error
    ErrorLog  logs/jf_ref.example.com_error_log

Which had a beautiful and convenient result under luberjack which includes such useful things as HTTP status, request, and so on. HTTP status was particularly useful for drawing graphs in Kibana. As well as being useful for generating nagios alerts when detecting a 404:

{
  "_index": "logstash-2016.02.05",
  "_type": "apache_ref_access",
  "_id": "AVKwtt8oMSH3nI3K3Jjm",
  "_score": null,
  "_source": {
    "host": "web1",
    "path": "/var/log/httpd/jf_ref.example.com_access_log",
    "tags": [
      "example ref.example.com"
    ],
    "message": "::1 - - [05/Feb/2016:03:14:42 -0500] \"GET /server-status?auto HTTP/1.1\" 200 437",
    "timestamp": "2016-02-05T03:14:42-0500",
    "clientip": "::1",
    "duration": 1799,
    "status": 200,
    "request": "/server-status?auto",
    "urlpath": "/server-status",
    "urlquery": "?auto",
    "method": "GET",
    "bytes": 437,
    "vhost": "ref.example.com",
    "@version": "1",
    "@timestamp": "2016-02-05T08:14:47.371Z",
    "type": "apache_ref_access",
    "file": "/var/log/httpd/jf_ref.example.com_access_log",
    "offset": "15366916"
  },
  "fields": {
    "@timestamp": [
      1454660087371
    ]
  },
  "highlight": {
    "host.raw": [
      "@kibana-highlighted-field@web1@/kibana-highlighted-field@"
    ],
    "host": [
      "@kibana-highlighted-field@web1@/kibana-highlighted-field@"
    ]
  },
  "sort": [
    1454660087371
  ]
}

But I can't get the same result when I use filebeats. Below is the result I get when I'm using filebeats:

    {
      "_index": "logstash-2016.02.09",
      "_type": "apache_ref_access",
      "_id": "AVLG7CFgMSH3nI3Kx5z-",
      "_score": null,
      "_source": {
        "message": "{       \"host\":\"ref.example.com.example.com\",       \"path\":\"/var/log/httpd/jf_ref.example.com_access_log\",       \"tags\":[\"example ref.example.com\"],       \"message\": \"83.142.160.6 - - [09/Feb/2016:10:41:20 -0500] \\\"POST /joke_details.php?joke_id=id12&v=0 HTTP/1.1\\\" 200 6076\",       \"timestamp\": \"2016-02-09T10:41:20-0500\",       \"clientip\": \"83.142.160.6\",       \"duration\": 1197950,       \"status\": 200,       \"request\": \"/joke_details.php?joke_id=id12&v=0\",       \"urlpath\": \"/joke_details.php\",       \"urlquery\": \"?joke_id=id12&v=0\",       \"method\": \"POST\",       \"bytes\": 6076,       \"vhost\": \"ref.example.com\"     }",
        "@version": "1",
        "@timestamp": "2016-02-09T15:41:22.915Z",
        "beat": {
          "hostname": "web1",
          "name": "filebeat"
        },
        "count": 1,
        "fields": {
          "service": "apache",
          "type": "apache_ref_access"
        },
        "input_type": "log",
        "offset": 4672153,
        "source": "/var/log/httpd/jf_ref.example.com_access_log",
        "tags": [
          "example-dev",
          "web-tier",
          "beats",
          "beats_input_codec_plain_applied"
        ],
        "type": "apache_ref_access",
        "host": "web1"
      },
      "fields": {
        "@timestamp": [
          1455032482915
        ]
      },
      "highlight": {
        "fields.type": [
          "@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
        ],
        "type.raw": [
          "@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
        ],
        "fields.type.raw": [
          "@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
        ],
        "type": [
          "@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
        ]
      },
      "sort": [
        1455032482915
      ]
    }

And there's not nearly as much useful info there. I realize that my apache logs are outputting in JSON and logstash is able to understand that and produce these beautiful results. How do I get the same useful info out of filebeats that I did under lumberjack? Is there a way to recreate this effect under yaml?

Thanks

Filebeat uses YAML only for its configuration file. The data it sends to Logstash is JSON. The line it reads from your files is put into the message field of the event sent to Logstash.

Since your lines are JSON, you need to apply the JSON codec to the input. See Parse / ship JSON file with filebeat

Thanks man. I'll take a look. I'll work with the json filter and see where I get with this.

Thanks!! That did the trick!

{
  "_index": "logstash-2016.02.10",
  "_type": "apache_ref_access",
  "_id": "AVLJaQLdOPt5aurHXxKV",
  "_score": null,
  "_source": {
    "host": "ref.example.com",
    "path": "/var/log/httpd/jf_ref.example.com_access_log",
    "tags": [
      "jokefire-dev",
      "web-tier",
      "beats",
      "beats_input_codec_json_applied"
    ],
    "message": "213.238.179.240 - - [09/Feb/2016:22:54:32 -0500] \"GET /joke_details.php?joke_id=id20&v=0 HTTP/1.1\" 200 966",
    "timestamp": "2016-02-09T22:54:32-0500",
    "clientip": "213.238.179.240",
    "duration": 1047497,
    "status": 200,
    "request": "/joke_details.php?joke_id=id20&v=0",
    "urlpath": "/joke_details.php",
    "urlquery": "?joke_id=id20&v=0",
    "method": "GET",
    "bytes": 966,
    "vhost": "ref.example.com",
    "@version": "1",
    "@timestamp": "2016-02-10T03:54:35.497Z",
    "beat": {
      "hostname": "web1",
      "name": "filebeat"
    },
    "count": 1,
    "fields": {
      "service": "apache",
      "type": "apache_ref_access"
    },
    "input_type": "log",
    "offset": 5927887,
    "source": "/var/log/httpd/jf_ref.example.com_access_log",
    "type": "apache_ref_access"
  },
  "fields": {
    "@timestamp": [
      1455076475497
    ]
  },
  "highlight": {
    "fields.type": [
      "@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
    ],
    "type.raw": [
      "@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
    ],
    "type": [
      "@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
    ],
    "fields.type.raw": [
      "@kibana-highlighted-field@apache_ref_access@/kibana-highlighted-field@"
    ]
  },
  "sort": [
    1455076475497
  ]
}

Appreciate the help!