TAB in grok filter and delete mismatched records

hi all, (sorry for my english)

hello everyone I have the following problem, the log I'm trying to read comes with tabs and I can't filter correctly. I have tried several options but I cannot read it correctly 100%

this is an example, the tab will call it "TAB" ok

07/17/2020 10:55:30"TAB"-"TAB"1.21.73.192"TAB"w00044b52270013005500530055CHV Noticias Tarde
07/17/2020 10:55:33"TAB"-"TAB"1.6.201.251"TAB"o0006ffa47b0083400600470047
07/17/2020 10:55:37"TAB"-"TAB"1.23.221.40"TAB"w0004ca3a650013405501010038CHV Noticias Tarde
07/17/2020 10:55:38"TAB"-"TAB"1.23.141.130"TAB"w000793ea500084400900870087Zootopia
07/17/2020 10:55:39"TAB"-"TAB"1.22.193.98"TAB"O000aca1a910133001000130002
07/17/2020 10:55:40"TAB"-"TAB"1.23.170.57"TAB"w000abe6513003140530144014424 Tarde
07/17/2020 10:55:42"TAB"-"TAB"1.6.10.242"TAB"O000abd00b40165005200130002
07/17/2020 10:55:44"TAB"-"TAB"1.7.98.202"TAB"W00070fec3f0272062100020002Oro bajo cero

    input {
      file {
        path => "/home/harvest/Region_*/harvestData.log*"
        start_position => "beginning"
    	codec => plain { charset => "ISO-8859-1" }
    	type => "harvest"
      }  
    }

    filter {

     if [type] == "harvest" {
        grok {
          match => { "message" => "(?<TIMESTAMP_ISO8601:timestamp>%{MONTHNUM}/%{MONTHDAY}/%{YEAR} %{TIME}) - %{IP} (?<WATCH>[W-w].{1})(?<MAC>.{10})(?<Service_ID>.{5})(?<NCANAL>.{3})(?<TIME_HARVEST>.{4})(?<IDLE_HARVEST>.{4})%{GREEDYDATA:PROGRAMACION}" }	 
    	}
    	
     date {
    		match => [ "timestamp", "ISO8601", "MM/dd/yyyy HH:mm:ss", "mm/dd/yyyy HH:mm:ss", "m/dd/yyyy HH:mm:ss", "M/dd/yyyy HH:mm:ss" ]
    		target => "@timestamp"
    		remove_field => [ "timestamp" ]
    	}
    	
    	translate {
    		field => "[NCANAL]"
    		destination => "[CANAL_NOMBRE]"
    		dictionary_path => "/home/logstash/harvest/dictionary/canal.yaml"
    		fallback => "No Encontrado"
    		exact => true
            regex => false		
    	}
    	  
        translate {
    		field => "[NCANAL]"
    		destination => "[TIPO]"
    		dictionary_path => "/home/logstash/harvest/dictionary/canal_tipo.yaml"
    		fallback => "No Encontrado"
    		exact => true
            regex => false	
    	}
      }  
    }

    output {
    	elasticsearch {
    		hosts => "localhost:9200"
    		index => "harvest-%{+YYYY.MM.dd}"
    	}
    }

The other question, is how can I remove the mismatched records

here the solution

input {
  file {
    path => "/home/harvest/Region_*/harvestData.log*"
    start_position => "beginning"
	codec => plain { charset => "ISO-8859-1" }
	type => "harvest"
  }  
}

filter {

 if [type] == "harvest" {
    grok {
      match => { "message" => "(?<TIMESTAMP_ISO8601:timestamp>%{MONTHNUM}/%{MONTHDAY}/%{YEAR} %{TIME}).*-.*%{IP}\t(?<WATCH>[Ww].{0})(?<MAC>.{10})(?<Service_ID>.{5})(?<NCANAL>.{3})(?<TIME_HARVEST>.{4})(?<IDLE_HARVEST>.{4})%{GREEDYDATA:PROGRAMACION}" }	 
	}
 
	date {
		match => [ "timestamp", "ISO8601", "MM/dd/yyyy HH:mm:ss", "mm/dd/yyyy HH:mm:ss", "m/dd/yyyy HH:mm:ss", "M/dd/yyyy HH:mm:ss" ]
		target => "@timestamp"
		remove_field => [ "timestamp" ]
	}
	
	translate {
		field => "[NCANAL]"
		destination => "[CANAL_NOMBRE]"
		dictionary_path => "/home/logstash/harvest/dictionary/canal.yaml"
		fallback => "No Encontrado"
		exact => true
        regex => false		
	}
	  
    translate {
		field => "[NCANAL]"
		destination => "[TIPO]"
		dictionary_path => "/home/logstash/harvest/dictionary/canal_tipo.yaml"
		fallback => "No Encontrado"
		exact => true
        regex => false	
	}
  } 
  if "_grokparsefailure" in [tags] { drop {} }
}

output {
	elasticsearch {
		hosts => "localhost:9200"
		index => "harvest-%{+YYYY.MM.dd}"
	}
}

use gsub in the filter:

mutate: {
       gsub => [
       "message", "TAB", " "
     ]
}

It will remove the TAB to " " (space)

Regards,
Fadjar Tandabawana

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.