Logstash - rename every occurrence of a multi-level nested JSON array element


(Amit Singh) #1

Hi,

I am trying to send some JSON data into elasticsearch using logstash:

My logstash configuration looks like this: test.config

input{
	file{
			path => "/Users/bob/Study/data/test.json"
			codec => json
			sincedb_path => "/dev/null"
			start_position => "beginning"
		}
}

filter{
	json{
		source => "student"
		target => "student"
	}

	mutate{
		convert => { 
			"name" => "string"
			"score" => "float"
			"address" => "string"
		}
	}

	filter {
		if([student][users]){
			ruby {
				code => "
		            b = []
		            event.get('[student][users]').each { |k|
		                k[files]['file1'] = k[files]['File1']
	                    k.delete('file1')

                        event.get('[student][users]').each { |k|
		                k[files]['file2'] = k[files]['File2']
	                    k.delete('file2')

		                
		                logger.info('for each k', 'value' => k)
		                b << k
		            }
		            event.set('[student][users][files]', b)
		        "
			}
		}
	}
}

output{
	elasticsearch{
		hosts => "localhost:9200"
		index => "test"
		document_type => "student"
		manage_template => true
		template => "/Users/bob/Study/data/index_templates/test_template.json"
		template_name => "test_template"
		template_overwrite => true
	}
	stdout { 
		codec => rubydebug 
	}
}

And the mapping template being used here is: test_template.json

{
    "index_patterns": "test",
    "settings" : {
        "number_of_shards" : 1,
        "number_of_replicas" : 0,
        "index" : {
            "query" : { "default_field" : "@words" }
        }
    },
    "mappings": {
        "student": { 
            "_source": { "enabled": true },
            "dynamic_templates": [
                {
                    "string_template" : { 
                        "match" : "*",
                        "mapping": { "type": "keyword", "index": true },
                        "match_mapping_type" : "string"
                     } 
                 }
             ],
             "properties" : {
                "name": {"type":"keyword", "index": true},
                "score": {"type": "float"},
                "address": {"type":"keyword", "index": true},
                "lastUpdated":{"type": "date", "format": "epoch_millis"},
                "firstUpdated": {"type": "date", "format": "epoch_millis"},
                "official":{
                    "type": "nested",
                    "properties": {
                        "suid": {"type": "keyword", "index": true},
                        "uploader": {
                            "type": "nested",
                            "properties": {
                                "AGS": {"type": "date", "format": "epoch_millis"},
                                "AGM": {"type": "date", "format": "epoch_millis"}
                            }
                        },
                        "rank": {"type": "integer"}
                    }
                },
                
                "users": {
                	"type": "nested",
                	"properties":{
                		"id": {"type": "keyword", "index": true},
                		"files":{
		                	"type": "nested",
		                	"properties": {
		                		"file1": {
		                			"type": "nested",
		                			"properties":{
		                				"name": {"type":"keyword", "index": true},
		                				"signed": {"type":"boolean"},
		                				"failureReason": {"type":"keyword", "index": true},
		                				"version": {"type":"keyword", "index": true},
		                				"checksum": {"type":"keyword", "index": true},
		                				"signerInfo": {"type":"keyword", "index": true}
		                			}
		                		},
		                		"file2": {
		                			"type": "nested",
		                			"properties":{
		                				"name": {"type":"keyword", "index": true},
		                				"signed": {"type":"boolean"},
		                				"failureReason": {"type":"keyword", "index": true},
		                				"version": {"type":"keyword", "index": true},
		                				"checksum": {"type":"keyword", "index": true},
		                				"signerInfo": {"type":"keyword", "index": true}
		                			}
		                		}
		                	}
		                }
                	}
                }
            }
        }
    }
}

Here, the fields ../bob/filename2 & ../bob/filename2 start with dots (..). This causes trouble in indexing my JSON data into elasticsearch. My JSON data is:

{
	"name": "Michelle Obama",
	"score": "9.0",
	"address": "California",
	"lastUpdated": "1545078074640",
	"firstUpdated": "86400",
	"official": [{
		"suid": "c0c85dc9-1d1e-4bf4-9506-d6c93d3d3dd0",
		"uploader": {
			"AGS": 1544817662070,
			"AGM": 1544817662070
		},
		"rank": 5
	}],


	"users": [
		{
			"id": "ABCD,
			"files": {
				"file1": {
					"name": "FileName1",
					"signed": true,
					"failureReason": null,
					"version": "13.0.0",
					"checksum": null,
					"signerInfo": "Signer1"
				},
				"file2": {
					"name": "FileName2",
					"signed": false,
					"failureReason": "InvalidCodeSignature(-67061)",
					"version": "6.0.0.75",
					"checksum": null,
					"signerInfo": "Signer2"
				}
			}
		},

		{
			"id": "WXYZ,
			"files": {
				"file1": {
					"name": "FileName1",
					"signed": true,
					"failureReason": null,
					"version": "13.0.0",
					"checksum": null,
					"signerInfo": "Signer1"
				},
				"file2": {
					"name": "FileName2",
					"signed": false,
					"failureReason": "None",
					"version": "6.0.0.75",
					"checksum": null,
					"signerInfo": "Signer2"
				}
			}
		}
	],
}

Here, I am trying to rename the nested fields, file1 and file2 to File1 and File2 respectively for every entry in the array(In my sample JSON these entries occur 2 times in the array). These are the nested fields in a nested JSON array. I am looking for the correct ruby code. I know the one I am having right now in my logstash config file is incorrect.
What should I change in this ruby code to get this done:

if([student][users]){
			ruby {
				code => "
		            b = []
		            event.get('[student][users]').each { |k|
		                k[files]['file1'] = k[files]['File1']
	                    k.delete('file1')

                        event.get('[student][users]').each { |k|
		                k[files]['file2'] = k[files]['File2']
	                    k.delete('file2')
		                
		                logger.info('for each k', 'value' => k)
		                b << k
		            }
		            event.set('[student][users][files]', b)
		        "
			}
		}

Please ignore the confusing fields in my JSON. This is only a test JSON file. I am only trying to get an idea on how to rename the nested fields in a JSON array.


#2

:smiley: You need to fix your JSON, missing closing quotes on the "id" fields.

ruby {
    code => "
        b = []
        event.get('[student][users]').each { |k|
        k['files']['File1'] = k['files']['file1']
        k['files'].delete('file1')

        #  event.get('[student][users]').each { |k| <-- delete this
        k['files']['File2'] = k['files']['file2']
        k['files'].delete('file2')

        # logger.info('for each k', 'value' => k)
        b << k
        }
        event.set('[student][users]', b)
        "
}

(Amit Singh) #3

Thank you @Badger...it worked.
One more thing what should be the syntax to rename the fields only if they exist? otherwise a new field is created with the value nil

I tried this:
if(['files']['file1'] != nil){
// rename the field
}

But I get this error:

syntax error, unexpected tLCURLY


#4

logstash uses if {}, but ruby uses if then end.


(Amit Singh) #5

Thanks @Badger :grinning:, got it done