New fields resolve as expected in stdout output but not in elasticsearch output


(Vincent Tran) #1
input {
   file {
     type => "json"
     path => "/home/user/json/test.json"
     #codec => json
     start_position => "beginning"
     sincedb_path => "/dev/null"
   }
 }

 filter {
   json {
     source => "message"
     target => "raw"
     add_field => {
       "Name" => "%{[raw][0][content][0][data][Name]}"
       "Issues" => "%{[raw][0][content][0][data][Issues]}"
       "Serial" => "%{[raw][0][content][0][data][Serial]}"
       "Model" => "%{[raw][0][content][0][data][Model]}"
       "Customer ID" => "%{[raw][0][content][0][data][Customer ID]}"
       "Version" => "%{[raw][0][content][0][data][Version]}"
       "Management IP" => "%{[raw][0][content][0][data][Management IP]}"
       "Timestamp" => "%{[raw][0][content][0][data][Timestamp]}"
       #"Summary" => "%{[raw][1][content][0][data]}"
     }
     #remove_field => [ "raw", "message" ]
   }
 # ...
  date {
    match => [ "Timestamp", "EEE, dd MMM YYYY HH:mm:ss zzz"]
     remove_field => [ "Timestamp", "raw", "message" ]
    }
   
 
 }

 output {
   stdout { codec => rubydebug}
  #stdout {codec => json}
  #elasticsearch {
  #  hosts => ["es1", "es2"]
  #  sniffing => true
  #  workers => 4
  #  codec => json
  # }

 }

Test json

 [{
 		"content" : [{
 				"data" : {
 					"Name" : "My system",
 					"Issues" : 20,
 					"Serial" : "999999",
 					"Model" : "4K-3D1",
 					"Customer ID" : "Not specified",
 					"Version" : "9.2.1-20151105-1459-441",
 					"Management IP" : "10.20.6.123",
 					"Timestamp" : "Wed, 11 Nov 2015 16:53:53 UTC"
 				},
 				"type" : "data"
 			}
 		],
 		"type" : "header"
 	}]

When I run LS with stdout output, the newly added fields resolve to the referenced nested fields as expected.

However when I finally switch to ES output the reference fields fail to resolve, this is the resulting document in ES:

 {
   "_index": "logstash-2015.11.21",
   "_type": "json",
   "_id": "AVEo2-iscCK8uQtW9SSo",
   "_score": null,
   "_source": {
     "message": "",
     "@version": "1",
     "@timestamp": "2015-11-21T07:04:37.951Z",
     "host": "myhost",
     "path": "/home/user/json/test.json",
     "type": "json",
     "raw": null,
     "Name": "%{[raw][0][content][0][data][Name]}",
     "Issues": "%{[raw][0][content][0][data][Issues]}",
     "Serial": "%{[raw][0][content][0][data][Serial]}",
     "Model": "%{[raw][0][content][0][data][Model]}",
     "Customer ID": "%{[raw][0][content][0][data][Customer ID]}",
     "Version": "%{[raw][0][content][0][data][Version]}",
     "Management IP": "%{[raw][0][content][0][data][Management IP]}",
     "Timestamp": "%{[raw][0][content][0][data][Timestamp]}",
     "tags": [
       "_rubyexception",
       "_dateparsefailure"
     ]
   },
   "fields": {
     "@timestamp": [
       1448089477951
     ]
   },
   "sort": [
     1448089477951
   ]
 }

I'm hoping for a fresher pair of eyes as I've been staring at this for a while...


Trying to access nested json in logstash mutate filter
(Glen R Smith) #2
input {
    file {
        type => "json"
        path => "FILEPATH"
        start_position => "beginning"
        sincedb_path => "/dev/null"
    }
}

filter {
    json {
        source => "message"
        target => "raw"
        add_field => {
            "Name" => "%{[raw][content][content][data][data][Name]}"
            "Issues" => "%{[raw][content][content][data][data][Issues]}"
            "Serial" => "%{[raw][content][content][data][data][Serial]}"
            "Model" => "%{[raw][content][content][data][data][Model]}"
            "Customer ID" => "%{[raw][content][content][data][data][Customer ID]}"
            "Version" => "%{[raw][content][content][data][data][Version]}"
            "Timestamp" => "%{[raw][content][content][data][data][Timestamp]}"
        }
    }
    date {
        match => [ "[Timestamp]", "EEE, dd MMM YYYY HH:mm:ss zzz"]
    }
    mutate {
        remove_field => [ "raw", "message", "Timestamp" ]
    }
}

output {
    elasticsearch {
        host => ["localhost"]
        index => "discuss-%{+YYYY.MM.dd}"
        protocol => http
        template_name => "discuss"
    }
}

(Vincent Tran) #3

Thanks,

I checked ES the next day and turned out it was working after all. It sent both the raw event and the resolved event to ES. The raw event has current date timestamp, the resolved event has the substituted timestamp. I was filtering on the wrong date range.


(system) #4