Dupilcate message on elasticsearch

Hello Guys,

Need help regarding duplicate messages on my elasticsearch. I have below config. As you can see i'm using fingerprint plugin to fix duplicates but to no avail.

input {
beats {
port => 5044
ssl => true
ssl_certificate => "/etc/pki/tls/certs/logstash-forwarder.crt"
ssl_key => "/etc/pki/tls/private/logstash-forwarder.key"
}

}
filter {
if [type] == "syslog" {
grok {
match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:[%{POSINT:syslog_pid}])?: %{GREEDYDATA:syslog_message}" }
add_field => [ "received_at", "%{@timestamp}" ]
add_field => [ "received_from", "%{host}" ]
}
syslog_pri { }
date {
match => [ "syslog_timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss", "HH:mm:ss.SSSZZZ" ]
}
}

else if [type] == "applog" {
grok {
match => {"message" => "%{DATA:timestampko} .*"}
add_field => [ "received_at", "%{@timestamp}" ]
add_field => [ "received_from", "%{host}" ]
}
if "UTC " in [message] {
if "23:59:5" not in [message] {
ruby {
code => "
event.set('dateko', Time.now.strftime('%Y-%m-%d'))
"
}
mutate {
add_field => {
"akkatimestamp" => "%{dateko}.%{timestampko}"
}
remove_field => ["timestampko", "newtimestamp"]
}
date {
match => [ "akkatimestamp" , "YYYY-MM-dd.HH:mm:ss.SSSZZZ", "yyyy-MM-dd.HH:mm:ss.SSSZZZ" ]
timezone => "UTC"
target => "@timestamp"
}
}
else {
ruby {
code => "
event.set('datemo', DateTime.yesterday.strftime('%Y-%m-%d'))
"
}
mutate {
add_field => {
"akkatimestamp" => "%{datemo}.%{timestampko}"
}
remove_field => ["timestampko", "newtimestamp"]
}
date {
match => [ "akkatimestamp" , "YYYY-MM-dd.HH:mm:ss.SSSZZZ", "yyyy-MM-dd.HH:mm:ss.SSSZZZ" ]
timezone => "UTC"
target => "@timestamp"
}
}
}

else {
grok {
match => {"message" => "[%{DATA:timestamp}] .*"}
}
ruby {
code => "
event.set('date', Time.now.strftime('%L'))
"
}
ruby {
code => "
event.set('zone', Time.now.strftime('%Z'))
"
}
mutate {
add_field => {
"newtimestamp" => "%{timestamp}.%{date}%{zone}"
}
remove_field => ["date", "timestamp", "zone"]
}
date {
match => [ "newtimestamp", "YYYY/MM/dd HH:mm:ss.SSSZZZ" ]
}
}
}
if "DEBUG" not in [message] and "WARN" not in [message] and "INFO" not in [message] and "ERROR" not in [message] {
drop { }
}
fingerprint {
source => "message"
target => "%{[@metadata][fingerprint]}"
method => "MURMUR3"
}
}

output {
if [host] == "ip-x" or [host] == "ip-x" {
if [type] == "applog" {
elasticsearch {
hosts => ["https://search-em-escluster-rwzyzlh63zokr7bqssxjgerfbe.eu-west-1.es.amazonaws.com:443"]
sniffing => false
manage_template => false
index => "applog-%{+YYYY.MM.dd}"
document_type => "%{[@metadata][type]}"
document_id => "%{[@metadata][fingerprint]}"
}
}
else if [type] == "syslog" {
elasticsearch {
hosts => ["https://search-em-escluster-rwzyzlh63zokr7bqssxjgerfbe.eu-west-1.es.amazonaws.com:443"]
sniffing => false
manage_template => false
index => "syslog-%{+YYYY.MM.dd}"
document_type => "%{[@metadata][type]}"
}
}
else {
file {
path => "/var/log/logstash/unknown_messages_hlr.log"
}
}
}

else {
file {
path => "/var/log/logstash/unknown_messages.log"
}
}
}

Hello Guys

i've also tried solution like below config. But i still encountered duplicate message on my ES. the only difference is the "_id" field. Any thing you've adjusted other that above solution?

fingerprint {
source => "message"
target => "%{[@metadata][fingerprint]}"
method => "MURMUR3"
}
}

output {
if [host] == "ip-10-100-x" or [host] == "ip-10-100-x" {
if [type] == "applog" {
elasticsearch {
hosts => ["host:443"]
sniffing => false
manage_template => false
index => "hlr-applog-%{+YYYY.MM.dd}"
document_type => "%{[@metadata][type]}"
document_id => "%{[@metadata][fingerprint]}"
}
}
else if [type] == "syslog" {
elasticsearch {
hosts => ["host:443"]
sniffing => false
manage_template => false
index => "hlr-syslog-%{+YYYY.MM.dd}"
document_type => "%{[@metadata][type]}"
document_id => "%{[@metadata][fingerprint]}"
}
}
else {
file {
path => "/var/log/logstash/unknown_messages_x.log"
}
}
}

Can you show us two events in Elasticsearch that are duplicates?

Hello

it was resolved after i've removed document_id => "%{[@metadata][fingerprint]}". Just one question. I've created an index template on ES and i want to use it on my rotated index-%{+YYYY.MM.dd}" every creation. how can i set this on logstash output?

i have this error

[2017-11-17T08:05:35,713][INFO ][logstash.outputs.elasticsearch] Using mapping template from {:path=>"/home/ubuntu/ELK/my-akkatemplate.json"}
[2017-11-17T08:05:35,717][INFO ][logstash.outputs.elasticsearch] Attempting to install template {:manage_template=>{"my_akkatemplate"=>{"order"=>0, "template"=>"[applog, em-hlr-applog-*]", "settings"=>{"index"=>{"analysis"=>{"index"=>{"number_of_shards"=>"3", "number_of_replicas"=>"1"}, "analyzer"=>{"domain_name_analyzer"=>{"filter"=>"lowercase", "type"=>"custom", "tokenizer"=>"domain_name_tokenizer"}}, "tokenizer"=>{"domain_name_tokenizer"=>{"reverse"=>"true", "type"=>"PathHierarchy", "delimiter"=>"."}}}}}, "mappings"=>{"applog"=>{"properties"=>{"message"=>{"type"=>"text", "fields"=>{"ws"=>{"analyzer"=>"whitespace", "type"=>"text"}}}}}}, "aliases"=>{}}}}
[2017-11-17T08:05:35,721][INFO ][logstash.outputs.elasticsearch] Installing elasticsearch template to _template/logstash
[2017-11-17T08:05:35,725][ERROR][logstash.outputs.elasticsearch] Failed to install template. {:message=>"Got response code '400' contacting Elasticsearch at URL 'privte-host:443/_template/logstash'", :class=>"LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError", :backtrace=>["/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/http_client/manticore_adapter.rb:80:in perform_request'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/http_client/pool.rb:269:inperform_request_to_url'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/http_client/pool.rb:257:in perform_request'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/http_client/pool.rb:347:inwith_connection'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/http_client/pool.rb:256:in perform_request'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/http_client/pool.rb:264:input'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/http_client.rb:337:in template_put'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/http_client.rb:82:intemplate_install'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/template_manager.rb:29:in install'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/template_manager.rb:9:ininstall_template'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/common.rb:58:in install_template'", "/usr/share/logstash/vendor/bundle/jruby/1.9/gems/logstash-output-elasticsearch-7.4.0-java/lib/logstash/outputs/elasticsearch/common.rb:25:inregister'", "/usr/share/logstash/logstash-core/lib/logstash/output_delegator_strategies/shared.rb:9:in register'", "/usr/share/logstash/logstash-core/lib/logstash/output_delegator.rb:43:inregister'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:290:in register_plugin'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:301:inregister_plugins'", "org/jruby/RubyArray.java:1613:in each'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:301:inregister_plugins'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:310:in start_workers'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:235:inrun'", "/usr/share/logstash/logstash-core/lib/logstash/agent.rb:398:in `start_pipeline'"]}

Hello Going back to Duplicate messages

I still encounter it, i do have below config

filter {
fingerprint {
source => "message"
target => "%{[@metadata][fingerprint]}"
method => "MURMUR3"
}

}

output {
if [host] == "localhost" {
if [type] == "router" {
elasticsearch {
hosts => ["hostko:443"]
document_id => "%{[@metadata][fingerprint]}"
index => "router-applog-%{+YYYY.MM.dd}"
template => "/home/ubuntu/ELK/my-applogtemplate-updated.json"
template_name => "applog-"
template_overwrite => true
document_type => "%{[@metadata][type]}"
}
}
else if [type] == "syslog" {
elasticsearch {
hosts => ["hostko:443"]
sniffing => false
index => "maprouter-syslog-%{+YYYY.MM.dd}"
template => "/home/ubuntu/ELK/my-syslogtemplate-updated.json"
template_name => "syslog-
"
template_overwrite => true
document_type => "%{[@metadata][type]}"
document_id => "%{[@metadata][fingerprint]}"
}
}
else {
file {
path => "/var/log/logstash2/unknown_messages.log"
}
}
}
}

on Kibana, i can see that there are duplicates, the only difference is the "_id". Please help

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.