Issue parsing nested JSON


(Jared Kauppila) #1

I am having issues parsing the second nested json event message coming from NXLog using Logstash 1.5.3.

I have configured NXLog to ship my Log4Net app logs to a Logstash collector via TCP. From that collector, I ship the message to RabbitMQ. Then I have my Logstash indexer pulling messages off of that RabbitMQ Queue and run two json filters on it.

Any thoughts? Details below.

Message:

{"message":"{\"EventReceivedTime\":\"2015-07-22 10:20:29\",\"SourceModuleName\":\"DotNet_AppLogs\",\"SourceModuleType\":\"im_file\",\"filepath\":\"C:\\\\Temp\\\\SampleConsoleApp.log\",\"event_message\":\"{\\\"date\\\":\\\"2015-06-16 20:48:53,255\\\", \\\"thread\\\":\\\"8\\\", \\\"level\\\":\\\"INFO\\\", \\\"logger\\\":\\\"HowToSolveNow.Program\\\", \\\"hostname\\\":\\\"WORKSTATION\\\", \\\"username\\\":\\\"DOMAIN\\\\USERNAME\\\", \\\"location\\\":\\\"HowToSolveNow.Program.InitializeLogger(c:\\\\Users\\\\USERNAME\\\\Documents\\\\Visual Studio 2013\\\\Projects\\\\ConsoleApplication2\\\\ConsoleApplication2\\\\Program.cs:33)\\\", \\\"message\\\":\\\"Begin processing.\\\"}\",\"application_server\":\"WORKSTATION.DOMAIN.com\"}\r","@version":"1","@timestamp":"2015-07-22T15:20:29.842Z","host":"10.3.96.48","tags":["tcp-517"]}

{"message":"{"EventReceivedTime":"2015-07-22 10:20:29","SourceModuleName":"DotNet_AppLogs","SourceModuleType":"im_file","filepath":"C:\\Temp\\SampleConsoleApp.log","event_message":"{\"date\":\"2015-06-16 20:48:53,255\", \"thread\":\"8\", \"level\":\"INFO\", \"logger\":\"HowToSolveNow.Program\", \"hostname\":\"WORKSTATION\", \"username\":\"DOMAIN\\USERNAME\", \"location\":\"HowToSolveNow.Program.InitializeLogger(c:\\Users\\USERNAME\\Documents\\Visual Studio 2013\\Projects\\ConsoleApplication2\\ConsoleApplication2\\Program.cs:33)\", \"message\":\"Begin processing.\"}","application_server":"WORKSTATION.DOMAIN.com"}\r","@version":"1","@timestamp":"2015-07-22T15:20:29.842Z","host":"10.3.96.48","tags":["tcp-517"]}

Error:

←[33mTrouble parsing json {:source=>"event_message", :raw=>"{\"date\":\"2015-06-
16 20:48:53,255\", \"thread\":\"8\", \"level\":\"INFO\", \"logger\":\"HowToSolve
Now.Program\", \"hostname\":\"WORKSTATION\", \"username\":\"DOMAIN\\USERNAME\",
\"location\":\"HowToSolveNow.Program.InitializeLogger(c:\\Users\\USERNAME\\Docum
ents\\Visual Studio 2013\\Projects\\ConsoleApplication2\\ConsoleApplication2\\Pr
ogram.cs:33)\", \"message\":\"Begin processing.\"}", :exception=>#<LogStash::Jso
n::ParserError: Unrecognized character escape 'U' (code 85)
at [Source: [B@ae68b58; line: 1, column: 146]>**, :level=>:warn}←[0m
{
               "message" => "{\"EventReceivedTime\":\"2015-07-22 10:20:29\",\"So
urceModuleName\":\"DotNet_AppLogs\",\"SourceModuleType\":\"im_file\",\"filepath\
":\"C:\\\\Temp\\\\SampleConsoleApp.log\",\"event_message\":\"{\\\"date\\\":\\\"2
015-06-16 20:48:53,255\\\", \\\"thread\\\":\\\"8\\\", \\\"level\\\":\\\"INFO\\\"
, \\\"logger\\\":\\\"HowToSolveNow.Program\\\", \\\"hostname\\\":\\\"WORKSTATION
\\\", \\\"username\\\":\\\"DOMAIN\\\\USERNAME\\\", \\\"location\\\":\\\"HowToSol
veNow.Program.InitializeLogger(c:\\\\Users\\\\USERNAME\\\\Documents\\\\Visual St
udio 2013\\\\Projects\\\\ConsoleApplication2\\\\ConsoleApplication2\\\\Program.c
s:33)\\\", \\\"message\\\":\\\"Begin processing.\\\"}\",\"application_server\":\
"WORKSTATION.DOMAIN.com\"}\r",
              "@version" => "1",
            "@timestamp" => "2015-07-22T15:20:29.842Z",
                  "host" => "10.3.96.48",
                  "tags" => [
        [0] "tcp-517",
        [1] "software",
        [2] "dotnet",
        [3] "log4net",
        [4] "message_parsed",
        [5] "_jsonparsefailure"
    ],
                  "path" => "H:\\Working\\Indexing\\input.txt",
     "EventReceivedTime" => "2015-07-22 10:20:29",
      "SourceModuleName" => "DotNet_AppLogs",
      "SourceModuleType" => "im_file",
              "filepath" => "C:\\Temp\\SampleConsoleApp.log",
         "event_message" => "{\"date\":\"2015-06-16 20:48:53,255\", \"thread\":\
"8\", \"level\":\"INFO\", \"logger\":\"HowToSolveNow.Program\", \"hostname\":\"W
ORKSTATION\", \"username\":\"DOMAIN\\USERNAME\", \"location\":\"HowToSolveNow.Pr
ogram.InitializeLogger(c:\\Users\\USERNAME\\Documents\\Visual Studio 2013\\Proje
cts\\ConsoleApplication2\\ConsoleApplication2\\Program.cs:33)\", \"message\":\"B
egin processing.\"}",
    "application_server" => "WORKSTATION.DOMAIN.com",
                  "edoc" => {}
}

NXLog Configuration

## This is a sample configuration file. See the nxlog reference manual about the
## configuration options. It should be installed locally and is also available
## online at http://nxlog.org/nxlog-docs/en/nxlog-reference-manual.html

## Please set the ROOT to the folder your nxlog was installed into,
## otherwise it will not start.

#define ROOT C:\Program Files\nxlog
define ROOT C:\Program Files (x86)\nxlog

Moduledir %ROOT%\modules
CacheDir %ROOT%\data
Pidfile %ROOT%\data\nxlog.pid
SpoolDir %ROOT%\data
LogFile %ROOT%\data\nxlog.log

#<Extension _syslog>
#    Module      xm_syslog
#</Extension>

#<Input in>
#    Module      im_msvistalog
# For windows 2003 and earlier use the following:
#   Module      im_mseventlog
#</Input>

<Extension json>
    Module      xm_json
</Extension>

<Extension multiline>
    Module      xm_multiline
    HeaderLine /^({"date").*/
</Extension>

## Input Definitions
<Input DotNet_AppLogs>
	## Input Module
	Module	im_file
	
	## File Paths (Wild cards on directories is not supported)
	FILE "C:\Temp\SampleConsoleApp.log"

	InputType multiline
	
	## Save the position when nxlog exits
	SavePos FALSE
	
	## Read from the from the end of the file when nxlog started if a start position was not saved
	ReadFromLast FALSE
	
	## Search recursively under subdirectories
	Recursive TRUE
	
	## How frequently the module will check for new files/log entries
	PollInterval 1
	
	## Specifies in seconds how frequently the module will check the monitored directory for modifications to files and new files in case of a wildcarded File path
	#DirCheckInterval 2
	
	## Specifies how many files nxlog will actively monitor at most
	#ActiveFiles 10
	
	## Specifies whether input files should be monitored for possible file rotation via renaming in order to avoid rereading the file contents.
	#RenameCheck FALSE
	
	## Specifies whether open input files should be closed as soon as possible after there is no more data to read
	CloseWhenIdle FALSE
	
	## Drop commmented lines
	Exec if $raw_event =~ /^#/ drop();

	Exec $raw_event = replace($raw_event, "\r\n", ";");
	
	## Add the filepath to the message
	Exec	$filepath = file_name();
	
	## Add the raw event line as the message
	Exec	$event_message = $raw_event;
	
</Input>

<Output out>
    Module      om_tcp
    Host        COLLECTOR.DOMAIN.com
    Port        517
	#Exec $Hostname = 'hostname_fqdn()';
	Exec	$application_server = hostname_fqdn();
	Exec		to_json();
</Output>

<Route 1>
    Path        DotNet_AppLogs => out
</Route>

Logstash Collector Config

input {
   tcp {
       port => 517
       tags => ["tcp-517"]
   }
}

output {
    if "tcp-517" in [tags] {
		rabbitmq {
			host => "RABBITMQ.DOMAIN.com"
			port => "5671"
			ssl => "true"
			vhost => "logging"
			exchange => "direct-logs"
			exchange_type => "direct"
			key => "software.dotnet.log4net.basic.v1"
		}
	}	
}

Logstash Indexer Config

input {
  rabbitmq {
    codec => "json"
    host => "rabbitmq.domain.com"
    port => "5671"
    ssl => "true"
    vhost => "logging"
    queue => "software.dotnet.log4net.basic.v1"
    durable => "true"
    ack => "true"
    prefetch_count => "256"
    tags => ["software","dotnet","log4net"]
    threads => "1"
  }
}

filter {

  if "log4net" in [tags] {
  	
	json {
		source => "message"
		add_tag => [ "message_parsed" ]
	}
	
	json {
		source => "event_message"
		add_tag => [ "event_parsed" ]
	}
  }
}

output {
	stdout {
	  codec => rubydebug
  	}
	
	elasticsearch {
		host => "elasticsearch.domain.com"
		index => "log4net-%{+YYYY.MM.dd}"
  	}
}

(Magnus Bäck) #2

It looks like insufficient escaping in the input strings. The bad escape sequence it complains about is most likely from \\\"username\\\":\\\"DOMAIN\\\\USERNAME\\\". After removing two levels of JSON escaping from \\\\USERNAME we end up with \USERNAME. I suspect the input would need to be \\\\\\\\USERNAME.


(Jared Kauppila) #3

Thanks Magnus, I modified my NXLog config to convert the event_message to json prior to shipping so it appears to be parsed correctly now by the Logstash indexer.

The second json filter isn't tossing an error anymore, and is tagging the message as if it was successful, but the contents of that field are not being indexed. If I modify the target to "document", I can see that it appears to be parsing the initial message again rather than "event_message". Thoughts?


(Magnus Bäck) #4

What's your Logstash configuration at this point?


(Jared Kauppila) #5
input {
   rabbitmq {
    codec => "json"
    host => "RABBITMQ.DOMAIN.com"
    port => "5671"
    ssl => "true"
    vhost => "logging"
    queue => "software.dotnet.log4net.basic.v1"
    durable => "true"
    ack => "true"
    prefetch_count => "256"
    tags => ["software","dotnet","log4net"]
    threads => "1"
  }
  
}

filter {

  if "log4net" in [tags] {
  	
	json {
		source => "message"
		add_tag => [ "message_parsed" ]
	}
	
	json {
		source => "event_message"
		target => "document"
		add_tag => [ "event_parsed" ]
	}
  }
}

output {
	stdout {
	  codec => rubydebug
  	}
	
	elasticsearch {
		host => "ELASTICSEARCH.DOMAIN.com"
		index => "log4net-%{+YYYY.MM.dd}"
  	}
}

(Magnus Bäck) #6

Woah, you have three nested messages. I didn't see that at first. AFAICT, everything from the event_message field is being parsed extracted to fields, except the string in the document.event_message field. If you want that parsed as JSON you need a third json filter.

But really, it seems like you should look into the input side. This kind of nesting isn't reasonable.


(Jared Kauppila) #7

Good catch, looks like that was a result of me adding the to_json(); to $event_message within the NXLog config.

I think I figured it out and it's a much more simple configuration now:

  1. Modified the NXLog config to escape the backslashes in the raw event from the log files
  2. Parse that raw event as json (parse_json():wink: and then parse all the fields as json (to_json():wink: and send to the Logstash TCP input (codec => json_lines).
  3. Collector sends it to RabbitMQ
  4. Indexer reads from queue (codec => json)
  5. Nothing more needs to be done to the message since it's all json, so send it right to Elasticsearch.

Indexed in Elasticsearch

Payload on RabbitMQ

{"EventReceivedTime":"2015-07-22 17:33:30","SourceModuleName":"DotNet_AppLogs","SourceModuleType":"im_file","filepath":"C:\\Temp\\SampleConsoleApp.log","application_server":"WORKSTATION.DOMAIN.com","date":"2015-06-16 20:48:53,255","thread":"8","level":"INFO","logger":"HowToSolveNow.Program","hostname":"WORKSTATION","username":"DOMAIN\\USERNAME","location":"HowToSolveNow.Program.InitializeLogger(c:\\Users\\USERNAME\\Documents\\Visual Studio 2013\\Projects\\ConsoleApplication2\\ConsoleApplication2\\Program.cs:33)","message":"Begin processing.","@version":"1","@timestamp":"2015-07-22T22:33:30.789Z","host":"192.168.1.48","tags":["tcp-517"]}

{"EventReceivedTime":"2015-07-22 17:33:30","SourceModuleName":"DotNet_AppLogs","SourceModuleType":"im_file","filepath":"C:\Temp\SampleConsoleApp.log","application_server":"WORKSTATION.DOMAIN.com","date":"2015-06-16 20:48:53,255","thread":"8","level":"INFO","logger":"HowToSolveNow.Program","hostname":"WORKSTATION","username":"DOMAIN\USERNAME","location":"HowToSolveNow.Program.InitializeLogger(c:\Users\USERNAME\Documents\Visual Studio 2013\Projects\ConsoleApplication2\ConsoleApplication2\Program.cs:33)","message":"Begin processing.","@version":"1","@timestamp":"2015-07-22T22:33:30.789Z","host":"192.168.1.48","tags":["tcp-517"]}

NXLog Config

## This is a sample configuration file. See the nxlog reference manual about the
## configuration options. It should be installed locally and is also available
## online at http://nxlog.org/nxlog-docs/en/nxlog-reference-manual.html

## Please set the ROOT to the folder your nxlog was installed into,
## otherwise it will not start.

#define ROOT C:\Program Files\nxlog
define ROOT C:\Program Files (x86)\nxlog

Moduledir %ROOT%\modules
CacheDir %ROOT%\data
Pidfile %ROOT%\data\nxlog.pid
SpoolDir %ROOT%\data
LogFile %ROOT%\data\nxlog.log

<Extension json>
    Module      xm_json
</Extension>

<Extension multiline>
    Module      xm_multiline
    HeaderLine /^({"date").*/
</Extension>

## Input Definitions
<Input DotNet_AppLogs>
	## Input Module
	Module	im_file
	
	## File Paths (Wild cards on directories is not supported)
	FILE "C:\Temp\SampleConsoleApp.log"

	InputType multiline
	
	## Save the position when nxlog exits
	SavePos FALSE
	
	## Read from the from the end of the file when nxlog started if a start position was not saved
	ReadFromLast FALSE
	
	## Search recursively under subdirectories
	Recursive TRUE
	
	## How frequently the module will check for new files/log entries
	PollInterval 1
	
	## Specifies in seconds how frequently the module will check the monitored directory for modifications to files and new files in case of a wildcarded File path
	#DirCheckInterval 2
	
	## Specifies how many files nxlog will actively monitor at most
	#ActiveFiles 10
	
	## Specifies whether input files should be monitored for possible file rotation via renaming in order to avoid rereading the file contents.
	#RenameCheck FALSE
	
	## Specifies whether open input files should be closed as soon as possible after there is no more data to read
	CloseWhenIdle FALSE
		
	## Drop commmented lines
	Exec if $raw_event =~ /^#/ drop();

	## Replace the line breaks with semi-colon so we can capture multi-line stacktraces
	Exec $raw_event = replace($raw_event, "\r\n", ";");
	
	## Escape backslashes in the raw_event
	Exec $raw_event = replace($raw_event, "\\", "\\\");
	
	## Add the filepath to the message
	Exec	$filepath = file_name();
		
	## Add the fully qualified domain name as a field to the message	
	Exec	$application_server = hostname_fqdn();
	
	## Parse the raw_event to json
	Exec parse_json();
	
	## Parse all the fields as json
	Exec to_json();
	
</Input>

<Output out>
    Module      om_tcp
    Host        COLLECTOR.DOMAIN.com
    Port        517
</Output>

<Route 1>
    Path        DotNet_AppLogs => out
</Route>

Logstash Collector

input {
   tcp {
       port => 517
       tags => ["tcp-517"]
	   codec => "json_lines"
   }
}

output {
    if "tcp-517" in [tags] {
		stdout {
			codec => rubydebug
		}
		rabbitmq {
			host => "RABBITMQ.DOMAIN.com"
			port => "5671"
			ssl => "true"
			vhost => "logging"
			exchange => "direct-logs"
			exchange_type => "direct"
			key => "software.dotnet.log4net.basic.v1"
		}
	}	
}

Logstash Indexer

input {
   rabbitmq {
    codec => "json"
    host => "RABBITMQ.DOMAIN.com"
    port => "5671"
    ssl => "true"
    vhost => "logging"
    queue => "software.dotnet.log4net.basic.v1"
    durable => "true"
    ack => "true"
    prefetch_count => "256"
    tags => ["software","dotnet","log4net"]
    threads => "1"
  }
}

filter {

  if "log4net" in [tags] {
  	## TO-DO - Other manipulations of the data prior to sending it off
  }
}

output {
	stdout {
	  codec => rubydebug
  	}
	
	elasticsearch {
		host => "ELASTICSEARCH.DOMAIN.com"
		index => "log4net-%{+YYYY.MM.dd}"
  	}
}

(system) #8