Custom field to process DOB field calculating 'age'

I have sample data as:-

Sr No,CNIC,Cell Phone,Date Of Birth,Gender
1,3710144892657,3215510968,06/06/1961,male
2,3710155658463,3238547712,6/17/1952,male
3,3710153121295,3015357676,01/01/1956,male
4,3710116643365,3215701975,01/01/1964,male

I want to use data of birth field and calculate age my logstash.conf looks like

input {
	file	{

  type => "csv"
   path => "/root/filebeat-7.1.1-linux-x86_64/CAPP.csv"
	start_position => "beginning"
	sincedb_path => "/dev/null"
	}
}

filter {

  csv {
  separator => ","

	columns => ["Sr No", "CNIC", "Cell Phone" , "Date Of Birth", "Gender" ] 

  
}


 if [message] =~ /^Sr/ {
        drop{}
    }
	
	mutate {
    add_field => { "realtimestamp" => "%{@timestamp}" }
        }

date {
    match => [ "logstash_processed_at", "M/dd/YYYY" ]
    target => "@timestamp"
    add_tag => "timestamp_changed"
    }
	
		 ruby {
      code => "
        diff = event.get('timestamp_changed') - event.get('Date Of Birth');
		event.set('timediff') = diff; "
      add_tag => [ "Age" ]
    }
		

 ruby {
    code => "
      wanted_fields = [ 'Sr No', 'CNIC' , 'Cell Phone', '@timestamp' , 'Date Of Birth', 'Gender']
      event.to_hash.keys.each { |k|
        event.remove(k) unless wanted_fields.include? k
      }
    "
  }
		  
  
}



output {
  elasticsearch {
    hosts => ["http://localhost:9200"]
    index => "capp_v3"
     document_type => "capp_v3" 
  }
stdout
 {
	codec => rubydebug
	}	
}

Error

SyntaxError: (ruby filter code):4: syntax error, unexpected '='
                event.set('timediff') = diff;  
                     eval at org/jruby/RubyKernel.java:1061
                 register at /root/logstash-7.1.1/vendor/bundle/jruby/2.5.0/gems/logstash-filter-ruby-3.1.5/lib/logstash/filters/ruby.rb:59
                 register at org/logstash/config/ir/compiler/AbstractFilterDelegatorExt.java:56
         register_plugins at /root/logstash-7.1.1/logstash-core/lib/logstash/java_pipeline.rb:191
                     each at org/jruby/RubyArray.java:1792
         register_plugins at /root/logstash-7.1.1/logstash-core/lib/logstash/java_pipeline.rb:190
  maybe_setup_out_plugins at /root/logstash-7.1.1/logstash-core/lib/logstash/java_pipeline.rb:446
            start_workers at /root/logstash-7.1.1/logstash-core/lib/logstash/java_pipeline.rb:203
                      run at /root/logstash-7.1.1/logstash-core/lib/logstash/java_pipeline.rb:145
                    start at /root/logstash-7.1.1/logstash-core/lib/logstash/java_pipeline.rb:104
[2019-08-06T13:06:22,117][ERROR][logstash.agent           ] Failed to execute action {:id=>:test, :action_type=>LogStash::ConvergeResult::FailedAction, :message=>"Could not execute action: PipelineAction::Create<test>, action_result: false", :backtrace=>nil}
[2019-08-06T13:06:22,196][INFO ][logstash.outputs.elasticsearch] Attempting to install template {:manage_template=>{"index_patterns"=>"logstash-*", "version"=>60001, "settings"=>{"index.refresh_interval"=>"5s", "number_of_shards"=>1}, "mappings"=>{"dynamic_templates"=>[{"message_field"=>{"path_match"=>"message", "match_mapping_type"=>"string", "mapping"=>{"type"=>"text", "norms"=>false}}}, {"string_fields"=>{"match"=>"*", "match_mapping_type"=>"string", "mapping"=>{"type"=>"text", "norms"=>false, "fields"=>{"keyword"=>{"type"=>"keyword", "ignore_above"=>256}}}}}], "properties"=>{"@timestamp"=>{"type"=>"date"}, "@version"=>{"type"=>"keyword"}, "geoip"=>{"dynamic"=>true, "properties"=>{"ip"=>{"type"=>"ip"}, "location"=>{"type"=>"geo_point"}, "latitude"=>{"type"=>"half_float"}, "longitude"=>{"type"=>"half_float"}}}}}}}
[2019-08-06T13:06:22,244][ERROR][org.logstash.Logstash    ] java.lang.IllegalStateException: Logstash stopped processing because of an error: (SyntaxError) (ruby filter code):4: syntax error, unexpected '='
                event.set('timediff') = diff;

Hi,

Could you try :

event.set('timediff',diff);

The more cleaner version of config is

# Sample Logstash configuration for creating a simple
# Beats -> Logstash -> Elasticsearch pipeline.

input {
	file	{
#  beats { port => 5044 }
  type => "csv"
   path => "/root/filebeat-7.1.1-linux-x86_64/CAPP.csv"
	start_position => "beginning"
	sincedb_path => "/dev/null"
	}
}

filter {
#if[type] == "log"

  csv {
  separator => ","
#  match => [ 
	columns => ["Sr No", "CNIC", "Cell Phone" , "Date Of Birth", "Gender" ] 




  
}


 if [message] =~ /^Sr/ {
        drop{}
    }
	
	mutate {
    add_field => { "realtimestamp" => "%{@timestamp}" }
        }
		
	

date {
    match => [ "realtimestamp", "ISO8601" ]
	 target => "@timestamp"
    add_field => { 'timestamp_changed => '%{+M/dd/YYYY}' }
    }
	

	
		 ruby {
      code => "
        diff = event.get('timestamp_changed') - event.get('Date Of Birth')
		event.set('timediff',diff); "
      add_field => [ "Age" ]
    }
		
		
        

	
 ruby {
    code => "
      wanted_fields = [ 'Sr No', 'CNIC' , 'Cell Phone', '@timestamp' , 'Date Of Birth', 'Gender', 'Age' ,'timestamp_changed']
      event.to_hash.keys.each { |k|
        event.remove(k) unless wanted_fields.include? k
      }
    "
  }
		  
  
}



output {
  elasticsearch {
    hosts => ["http://localhost:9200"]
    #manage_template => false
#    index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}"
    index => "capp_v3"
#    user => "elastic"
#    password => "changeme"
     #document_type => "%{[@metadata][type]}"
     document_type => "capp_v3" 
  }
stdout
 {
	codec => rubydebug
	}	
}

output
{
"Cell Phone" => "3215510968",
"Gender" => "male",
"@timestamp" => 2019-08-06T13:58:11.555Z,
"Date Of Birth" => "06/06/61",
"Sr No" => "1",
"CNIC" => "3710144892657"
}
{
"Cell Phone" => "3238547712",
"Gender" => "male",
"@timestamp" => 2019-08-06T13:58:11.556Z,
"Date Of Birth" => "6/17/1952",
"Sr No" => "2",
"CNIC" => "3710155658463"
}
{
"Cell Phone" => "3015357676",
"Gender" => "male",
"@timestamp" => 2019-08-06T13:58:11.557Z,
"Date Of Birth" => "01/01/56",
"Sr No" => "3",
"CNIC" => "3710153121295"
}

cannot find 'age' field.

Could you replace age by timediff ?

I think add_field don't work with ruby plugin.

Still the same , no change in o/p

events are decorated by the ruby plugin, but add_field requires a field name and a value.

1 Like

I have made program as

  • extract just the year part from both date so from time-stamp as well csv column "date of birth" i'm just using the yyyy part.
  • Subtracting the two should get me age.

revised code is

# Sample Logstash configuration for creating a simple
# Beats -> Logstash -> Elasticsearch pipeline.

input {
	file	{
#  beats { port => 5044 }
  type => "csv"
   path => "/root/filebeat-7.1.1-linux-x86_64/CAPP.csv"
	start_position => "beginning"
	sincedb_path => "/dev/null"
	}
}

filter {
#if[type] == "log"

  csv {
  separator => ","
#  match => [ 
	columns => ["Sr No", "CNIC", "Cell Phone" , "Date Of Birth", "Gender" , "timediff", "timestamp_changed"] 
  
}

 if [message] =~ /^Sr/ {
        drop{}
    }
	
	mutate {
    add_field => { "realtimestamp" => "%{@timestamp}" }
        }
		
	

date {
    match => [ "realtimestamp", "ISO8601" ]
	 target => "@timestamp"
    add_field => { 'timestamp_changed' => '%{+YYYY}' }

    
    }
	
	date {
	
	match => [ "Date Of Birth", "MM/DD/YYYY" ]

	add_field => {"[year]" => "%{+YYY}"}
}
	

	
		 ruby {
		       init => "require 'time'"

      code => "
        diff = event.get('timestamp_changed') - event.get('year')
		event.set('timediff',diff); "
      add_tag => [ "timediff" ]

    }
		
	 	ruby {
    code => "
      wanted_fields = [ 'Sr No', 'CNIC' , 'Cell Phone', '@timestamp' , 'Date Of Birth', 'Gender', 'year' ,'timestamp_changed', 'tags']
      event.to_hash.keys.each { |k|
        event.remove(k) unless wanted_fields.include? k
      }
    "
  }
	
       

	
	  
  
}



output {
  elasticsearch {
    hosts => ["http://localhost:9200"]
    #manage_template => false
#    index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}"
    index => "capp_v3"
#    user => "elastic"
#    password => "changeme"
     #document_type => "%{[@metadata][type]}"
     document_type => "capp_v3" 
  }
stdout
 {
	codec => rubydebug
	}	
}

output
"tags" => [
[0] "_rubyexception"
],
"timestamp_changed" => "2019",
"Date Of Birth" => "06/06/1961",
"Sr No" => "1",
"Gender" => "male",
"year" => "1961",
"Cell Phone" => "3215510968",
"@timestamp" => 1961-01-06T00:00:00.000Z,
"CNIC" => "3710144892657"
}
{
"tags" => [
[0] "_rubyexception"
],
"timestamp_changed" => "2019",
"Date Of Birth" => "6/17/1952",
"Sr No" => "2",
"Gender" => "male",
"year" => "1952",
"Cell Phone" => "3238547712",
"@timestamp" => 1952-01-17T00:00:00.000Z,
"CNIC" => "3710155658463"
}
{
"tags" => [
[0] "_rubyexception"
],
"timestamp_changed" => "2019",
"Date Of Birth" => "01/01/1956",
"Sr No" => "3",
"Gender" => "male",
"year" => "1956",
"Cell Phone" => "3015357676",
"@timestamp" => 1956-01-01T00:00:00.000Z,
"CNIC" => "3710153121295"
}

I know i'm close somehow the maths subtraction function is not working

You are getting ruby exceptions which you should read. You need to convert the years to integers

diff = event.get('timestamp_changed').to_i - event.get('year').to_i

and you need to add 'timediff' to wanted_fields.

Also, what are you trying to do by having two date filters that both write to [@timestamp]?

1 Like

Really nice, works like a charm , I forgot to notice both data-type were not same actually they were string.

"Gender" => "male",
           "Cell Phone" => "3215510968",
        "Date Of Birth" => "06/06/1961",
                 "year" => "1961",
                 "tags" => [
        [0] "timediff"
    ],
           "@timestamp" => 1961-01-06T00:00:00.000Z,
    "timestamp_changed" => "2019",
             "timediff" => 58,
                 "CNIC" => "3710144892657",
                "Sr No" => "1"
}
{
               "Gender" => "male",
           "Cell Phone" => "3238547712",
        "Date Of Birth" => "6/17/1952",
                 "year" => "1952",
                 "tags" => [
        [0] "timediff"
    ],
           "@timestamp" => 1952-01-17T00:00:00.000Z,
    "timestamp_changed" => "2019",
             "timediff" => 67,
                 "CNIC" => "3710155658463",
                "Sr No" => "2"
}
{
               "Gender" => "male",
           "Cell Phone" => "3015357676",
        "Date Of Birth" => "01/01/1956",
                 "year" => "1956",
                 "tags" => [
        [0] "timediff"
    ],

Two date-filters were used, to extract timestamp_change and year values. I should 'remove' after I got them saved in timediff tag.

Is there a way i can define nested match inside single data filter?

I do not know what you mean by that.

Like I'm getting

"@timestamp" => 1952-01-17T00:00:00.000Z,

where in-fact it should be current system-time not one picked from "date of birth" field.

I have just realized what you are doing with those date filters. You are setting [@timestamp] and then using add_field to extract the year.

To get the current year you do not need a date filter, and instead of using a date filter to parse [Date Of Birth] use dissect

    mutate { add_field => { "timestamp_changed" => "%{+YYYY}" } }
    dissect { mapping => { "Date Of Birth" => "%{}/%{}/%{year}" } }

That way [@timestamp] never gets stepped on.

1 Like