Occasional duplicate entries going in to different indices

I have a syslog node that is outputting to logstash on TCP.
I seem to occasionally get duplicate log entries, one going to where I expect, the other goes to a new index.
My logstash syslog config is this:

input {
tcp {
port => 5140
}
udp {
port => 5140
}
}
filter {
if [type] == "syslog" {
if "reverseproxy" in [message]{
grok {
break_on_match => false
match => [
"message", "<%{INT:priority}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:interface}: id="%{INT:id}" srcip="%{IP:source_ip}" localip="%{IP:local_ip}" size="%{INT:size}" user="%{USER:user}" host="%{HOSTNAME:host}" method="%{WORD:method}" statuscode="%{INT:statuscode}" reason="%{DATA:reason}" extra="%{DATA:extra}" exceptions="%{DATA:exceptions}" time="%{INT:time}" url="%{DATA:url}" server="%{DATA:server}" referer="%{DATA:referer}" cookie="%{GREEDYDATA:cookie}" set-cookie="%{DATA:set_cookie}" ",
"message", "<%{INT:priority}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:interface}: id="%{INT:id}" srcip="%{IP:source_ip}" localip="%{IP:local_ip}" size="%{INT:size}" user="%{USER:user}" host="%{HOSTNAME:host}" method="%{WORD:method}" statuscode="%{INT:statuscode}" reason="%{DATA:reason}" extra="%{DATA:extra}" exceptions="%{DATA:exceptions}" time="%{INT:time}" url="%{DATA:url}" server="%{DATA:server}" referer="%{DATA:referer}"; cookie="%{GREEDYDATA:cookie}" set-cookie="%{DATA:set_cookie}" "]
}
mutate {
add_tag => "reverseproxy"
remove_tag => "_grokparsefailure"
}
geoip {
source => "source_ip"
}
}
if "ulogd" in [message]{
grok {
break_on_match => false
match => [
"message", "<%{INT:fw_id}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="%{DATA:name}" action="%{WORD:action}" fwrule="%{INT:fwrule}" outitf="%{NOTSPACE:outitf}" mark="%{DATA:mark}" app="%{DATA:app}" srcmac="%{MAC:srcmac}" srcip="%{IPV4:source_ip}" dstip="%{IPV4:destination_ip}" proto="%{WORD:protocol}" length="%{INT:length}" tos="%{DATA:tos}" prec="%{DATA:prec}" ttl="%{INT:ttl}" srcport="%{INT:srcport}" dstport="%{INT:dstport}" "]
}
mutate {
remove_tag => "_grokparsefailure"
add_tag => "ulogd" }
geoip {
source => "source_ip"
}
}
if "aua" in [message]{
grok {
break_on_match => false
match => [
"message", "<%{INT:fw_id}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="%{DATA:name}" srcip="%{IP:source_ip}" host="%{DATA:external_host}" user="%{DATA:user}" caller="%{WORD:caller}" reason="%{WORD:reason}" ",
"message", "<%{INT:fw_id}>(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:program}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="Trying %{IP:ip_intern} %{DATA}" "]
}
mutate {
remove_tag => "_grokparsefailure"
add_tag => "aua" }
geoip {
source => "source_ip"
}
}
if "confd" in [message]{
grok {
break_on_match => false
match => [
"message", "(?%{YEAR}:%{MONTHNUM}:%{MONTHDAY}-%{HOUR}:%{MINUTE}:%{SECOND}) %{HOSTNAME:logsource} %{WORD:confd}[%{INT:pid}]: id="%{INT:id}" severity="%{WORD:severity}" sys="%{WORD:sys}" sub="%{WORD:sub}" name="%{DATA:name}" user="%{WORD:user}" host="%{DATA:external_host}" user="%{DATA:user}" caller="%{WORD:caller}" reason="%{WORD:reason}" "]
}
mutate {
remove_tag => "_grokparsefailure"
add_tag => "confd" }
geoip {
source => "source_ip"
}
}

}
}

output {
if "heimdallr" in [message] {
elasticsearch {
hosts => ["172.x.x.x:9200"]
index => "utm-%{+YYYY.MM.dd}"
sniffing => false
}
}

stdout { codec => rubydebug }

}

From that output the majority of my logs go into a utm date index and look like this:

{
"_index": "utm-2017.11.09",
"_type": "logs",
"_id": "AV-eFAFmFdBBEmEyn9Ro",
"_version": 1,
"_score": null,
"_source": {
"@version": "1",
"host": "172.16.x.x",
"@timestamp": "2017-11-09T00:00:32.092Z",
"message": "<30>2017:11:08-23:58:32 heimdallr httpproxy[4344]: id="0001" severity="info" sys="SecureWeb" sub="http" name="http access" action="pass" method="CONNECT" srcip="172.16.x.x" dstip="x.x.x.x" user="" group="" ad_domain="" statuscode="200" cached="0" profile="REF_DefaultHTTPProfile (Default Web Filter Profile)" filteraction="REF_DefaultHTTPCFFAction (Default content filter action)" size="21127" request="0xe23df000" url="https://ec2messages.eu-west-1.amazonaws.com/" referer="" error="" authtime="0" dnstime="2" cattime="107" avscantime="0" fullreqtime="167138144" device="0" auth="0" ua="" exceptions="" category="178" reputation="trusted" categoryname="Internet Services" country="Ireland"",
"port": 35837
},
"fields": {
"@timestamp": [
1510185632092
]
},
"sort": [
1510185632092
]
}

But I'm finding that there are some events that are duplicated and I can't seem to see why. The event above has a duplicate that was sent to the "%{[@metadata][beat]}-2017.11.09" index. The message field is identical, the only difference is the index.

  {

"_index": "%{[@metadata][beat]}-2017.11.09",
"_type": "%{[@metadata][type]}",
"_id": "AV-eFAFiFdBBEmEyn9Rn",
"_score": 1,
"_source": {
"@version": "1",
"host": "172.16.x.x",
"@timestamp": "2017-11-09T00:00:32.092Z",
"message": """<30>2017:11:08-23:58:32 heimdallr httpproxy[4344]: id="0001" severity="info" sys="SecureWeb" sub="http" name="http access" action="pass" method="CONNECT" srcip="172.16.x.x" dstip="x.x.x.x" user="" group="" ad_domain="" statuscode="200" cached="0" profile="REF_DefaultHTTPProfile (Default Web Filter Profile)" filteraction="REF_DefaultHTTPCFFAction (Default content filter action)" size="21127" request="0xe23df000" url="https://ec2messages.eu-west-1.amazonaws.com/" referer="" error="" authtime="0" dnstime="2" cattime="107" avscantime="0" fullreqtime="167138144" device="0" auth="0" ua="" exceptions="" category="178" reputation="trusted" categoryname="Internet Services" country="Ireland"""",
"port": 35837
}
}

The index name that it is being output to is defined in a different output that this host can't talk to.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.