Logstash sometimes incorrectly parses input documents with logstash-input-azureblob

Using:

  • logstash (tested with 6.0.1 and 6.4.0)
  • logstash-input-azureblob 0.9.12

I've got the following pipeline configured for pulling in Application Gateway logs from Azure:

input {
  azureblob {
    storage_account_name => "xxxxdevmichaelazlogs"
    storage_access_key => "plugh"
    container => "insights-logs-applicationgatewayaccesslog"
    type => "application-gateway"
    codec => "json"
  }
}

filter {
  if [type] == "application-gateway" {
    date {
      match  => [ "time", ISO8601 ]
    }
    kv {
      source => "[properties][requestQuery]"
      target => "[properties][requestQuery]"
      field_split => "&"
      transform_key => "lowercase"
    }
    mutate {
      remove_field => ["time"]
      add_field    => { "ip" => "%{[properties][clientIP]}" }
    }
  }
}

output {
  if [type] == "application-gateway" {
    elasticsearch {
      id                 => "application-gateway"
      hosts              => ["elasticsearch-1", "elasticsearch-2", "elasticsearch-3"]
      template           => "/usr/share/logstash/config/mappings/application-gateway.json"
      template_overwrite => true
      template_name      => "application-gateway"
      index              => "application-gateway-%{+YYYY.MM.dd}"
      document_type      => "application-gateway"
      document_id        => "%{[properties][requestQuery][x-azureapplicationgateway-log-id]}"
    }
  }
  stdout { codec => "rubydebug" }
}

Everything is working great:

                {
                         "resourceId": "/SUBSCRIPTIONS/…/RESOURCEGROUPS/XXXX-DEV-MICHAEL/PROVIDERS/MICROSOFT.NETWORK/APPLICATIONGATEWAYS/AWG-CLUSTERS",
                         "operationName": "ApplicationGatewayAccess",
                         "time": "2018-08-28T14:53:01Z",
                         "category": "ApplicationGatewayAccessLog",
                         "properties": {"instanceId":"ApplicationGatewayRole_IN_1","clientIP":"198.2.1.1","clientPort":27452,"httpMethod":"POST","requestUri":"/message-bus/163aa8c8a30b4c3f9ca3bfd4a43472e9/poll","requestQuery":"dlp=t&X-AzureApplicationGateway-CACHE-HIT=0&SERVER-ROUTED=10.0.11.7&X-AzureApplicationGateway-LOG-ID=5a3ec6e0-16b2-4d6f-9e77-7009fd50dc6a&SERVER-STATUS=200","userAgent":"Mozilla/5.0+(X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/68.0.3440.106+Safari/537.36","httpStatus":200,"httpVersion":"HTTP/1.1","receivedBytes":1895,"sentBytes":674,"timeTaken":723,"sslEnabled":"on","host":"app-meta.xxxx-dev-michael.xxxx"}
                }

and correct output is:

{
         "category" => "ApplicationGatewayAccessLog",
       "properties" => {
           "httpMethod" => "POST",
           "requestUri" => "/message-bus/163aa8c8a30b4c3f9ca3bfd4a43472e9/poll",
           "sslEnabled" => "on",
          "httpVersion" => "HTTP/1.1",
             "clientIP" => "198.2.1.1",
           "instanceId" => "ApplicationGatewayRole_IN_0",
                 "host" => "app-meta.xxxx-dev-michael.xxxx",
            "sentBytes" => 674,
         "requestQuery" => {
            "x-azureapplicationgateway-cache-hit" => "0",
               "x-azureapplicationgateway-log-id" => "5ecdfe5e-6310-466a-a3af-be96568682ed",
                                  "server-status" => "200",
                                            "dlp" => "t",
                                  "server-routed" => "10.0.11.7"
        },
           "httpStatus" => 200,
            "userAgent" => "Mozilla/5.0+(X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/68.0.3440.106+Safari/537.36",
            "timeTaken" => 1788,
        "receivedBytes" => 1895,
           "clientPort" => 8479
    },
         "@version" => "1",
       "@timestamp" => 2018-08-28T15:42:16.000Z,
               "ip" => "198.2.1.1",
       "resourceId" => "/SUBSCRIPTIONS/…/RESOURCEGROUPS/XXXX-DEV-MICHAEL/PROVIDERS/MICROSOFT.NETWORK/APPLICATIONGATEWAYS/AWG-CLUSTERS",
    "operationName" => "ApplicationGatewayAccess",
             "type" => "application-gateway"
}

except I'm getting mystery documents appearing. One examplar source document is:

                {
                         "resourceId": "/SUBSCRIPTIONS/…/RESOURCEGROUPS/XXXX-DEV-MICHAEL/PROVIDERS/MICROSOFT.NETWORK/APPLICATIONGATEWAYS/AWG-CLUSTERS",
                         "operationName": "ApplicationGatewayAccess",
                         "time": "2018-08-28T15:41:16Z",
                         "category": "ApplicationGatewayAccessLog",
                         "properties": {"instanceId":"ApplicationGatewayRole_IN_0","clientIP":"198.2.1.1","clientPort":19014,"httpMethod":"POST","requestUri":"/message-bus/163aa8c8a30b4c3f9ca3bfd4a43472e9/poll","requestQuery":"dlp=t&X-AzureApplicationGateway-CACHE-HIT=0&SERVER-ROUTED=10.0.11.7&X-AzureApplicationGateway-LOG-ID=a6299a3a-cc8d-4394-87fe-23bf5e24e8df&SERVER-STATUS=200","userAgent":"Mozilla/5.0+(X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/68.0.3440.106+Safari/537.36","httpStatus":200,"httpVersion":"HTTP/1.1","receivedBytes":1895,"sentBytes":674,"timeTaken":2438,"sslEnabled":"on","host":"app-meta.xxxx-dev-michael.xxxx"}
                }

but the output document is incorrect - it's as though when it's parsed it's rooted at properties instead for some reason:

{
       "httpMethod" => "POST",
       "requestUri" => "/message-bus/163aa8c8a30b4c3f9ca3bfd4a43472e9/poll",
       "sslEnabled" => "on",
      "httpVersion" => "HTTP/1.1",
         "clientIP" => "198.2.1.1",
       "instanceId" => "ApplicationGatewayRole_IN_0",
             "host" => "app-meta.xxxx-dev-michael.xxxx",
         "@version" => "1",
        "sentBytes" => 674,
     "requestQuery" => "dlp=t&X-AzureApplicationGateway-CACHE-HIT=0&SERVER-ROUTED=10.0.11.7&X-AzureApplicationGateway-LOG-ID=a6299a3a-cc8d-4394-87fe-23bf5e24e8df&SERVER-STATUS=200",
       "@timestamp" => 2018-08-28T15:46:38.490Z,
       "httpStatus" => 200,
        "userAgent" => "Mozilla/5.0+(X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/68.0.3440.106+Safari/537.36",
        "timeTaken" => 2438,
    "receivedBytes" => 1895,
               "ip" => "%{[properties][clientIP]}",
       "clientPort" => 19014,
             "type" => "application-gateway"
}

How can I solve this problem?

If i resubmit the exact same problem document to the pipeline (via a jsonlines input) then it gets parsed without a problem, so I don't think this is any fault with the source document itself or the config:

{
             "port" => 35098,
         "category" => "ApplicationGatewayAccessLog",
       "@timestamp" => 2018-08-28T15:41:16.000Z,
         "@version" => "1",
             "host" => "gateway",
    "operationName" => "ApplicationGatewayAccess",
       "properties" => {
           "httpMethod" => "POST",
           "requestUri" => "/message-bus/163aa8c8a30b4c3f9ca3bfd4a43472e9/poll",
           "sslEnabled" => "on",
          "httpVersion" => "HTTP/1.1",
             "clientIP" => "198.2.1.1",
           "instanceId" => "ApplicationGatewayRole_IN_0",
                 "host" => "app-meta.xxxx-dev-michael.xxxx",
            "sentBytes" => 674,
         "requestQuery" => {
            "x-azureapplicationgateway-cache-hit" => "0",
               "x-azureapplicationgateway-log-id" => "a6299a3a-cc8d-4394-87fe-23bf5e24e8df",
                                  "server-status" => "200",
                                            "dlp" => "t",
                                  "server-routed" => "10.0.11.7"
        },
           "httpStatus" => 200,
            "userAgent" => "Mozilla/5.0+(X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/68.0.3440.106+Safari/537.36",
            "timeTaken" => 2438,
        "receivedBytes" => 1895,
           "clientPort" => 19014
    },
               "ip" => "198.2.1.1",
       "resourceId" => "/SUBSCRIPTIONS/…/RESOURCEGROUPS/XXXX-DEV-MICHAEL/PROVIDERS/MICROSOFT.NETWORK/APPLICATIONGATEWAYS/AWG-CLUSTERS",
             "type" => "application-gateway"
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.