Duplicate documents

Hi,
I have below configuration of LS. I don't know why it shows 2 JSON messages in debug on console as well as in ES, with same offset. I think there are 2 threads doing this duplication. How to debug thread information of Logstash on windows?

input { 
beats {
    port => "5000"  
}
    }
    filter{
     if [type]!="tomcatAccessLog"{
                    # to filter logs other then access logs.
                    grok {
                     match => { "message" => "%{TIMESTAMP_ISO8601:eventTime}" }
             }
        }
       # parse crappy unstructured log into structured and queryable.
        if [type]=="tomcatAccessLog"{
    				grok {
                            match => { "message" => '%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:eventLogTime}\] "(?:%{WORD:method}%{NOTSPACE:requestURL}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:responseStatus} (?:%{NUMBER:responseSize}|-)'}
                    }	
    	}
    						date { 
    							match => [ "eventLogTime", "YYYY-MM-dd'T'HH:mm:ss", "YYYY-MM-dd HH:mm:ss","HH:mm:ss MMM dd yyyy","YYYY-MM-dd HH:mm:ss,SSS","yyyy.MM.dd G 'at' HH:mm:ss z","yyyyy.MMMMM.dd GGG hh:mm aaa","EEE, d MMM yyyy HH:mm:ss Z","yyyy-MM-dd'T'HH:mm:ss.SSSZ","yyyy-MM-dd'T'HH:mm:ss.SSSZ+0300", "YYYY/MM/dd HH:mm:ss","yyyy-MM-dd'T'HH:mm:ss.SSSSSZ+03:00","YYYY-MM-dd HH:mm:ss.SSS","YYYY-MM-dd HH:mm:ss.S","yyyy-MM-dd'T'HH:mm:ss.SSSSSS+02:00","YYYY-MM-dd HH:mm:ss.SSSS"]
    							target => "eventLogTime"
    						}
            # Spliting and getting new field created for Kibana
            kv {
                    include_keys => [ "ident", "auth", "imestamp", "verb", "request", "httpversion", "rawrequest", "response" , "bytes", "responseDuration" ]
            }
            # adding new field event.
            mutate {
                    remove_field => [ "ident", "httpversion", "auth", "[beat][name]", "[beat][hostname]", "input_type", "clientip"]
    				convert => { "responseTime" => "integer"}
    				#add_field => { "testfield" => "Hello this is test field" }
    			gsub => [
    				 "requestURL",  "\d{6}[^0-9]\d{3}[0-9a-zA-Z.*]", "******-***",
    				 "message",     "\d{6}[^0-9]\d{3}[0-9a-zA-Z.*]", "******-***"
    			]
    		}
    }
    output {
      elasticsearch {
    	 hosts => ["localhost:9200"]
    	 index => "%{Application}-%{+YYYY.MM.dd}"
      }
      stdout { codec => rubydebug { metadata => true } }
    }

Hi,
One more update about this is. I am using filebeat 1.2.1 with below configuration.

filebeat:
  # List of prospectors to fetch data.
  prospectors:
   -
      paths:
        - \test.log

      fields:
        Application: salesbackend
        Sub-System:  Sales

      fields_under_root: true

      document_type: error

  registry_file: /var/lib/filebeat/registry

output:
  ### Logstash as output
  logstash:
    # The Logstash hosts
    hosts: ["localhost:5000"]
 
    # Optional TLS. By default is off.
    #tls:
      # List Iof root certificates for HTTPS server verifications
      #certificate_authorities: ["/opt/SSL/lij-issuing.cer.pem"]

logging:

   to_files: true

  # To enable logging to files, to_files option has to be set to true
   files:
    # The directory where the log files will written to.
     path: /

    # The name of the files where the logs are written to.
     name: filebeat

    # Configure log file size limit. If limit is reached, log file will be
    # automatically rotated
     rotateeverybytes: 10485760 # = 10MB
 
   level: debug

Perhaps you have two configuration files in /etc/logstash/conf.d that both contain the same output configuration?

1 Like