Field _id is defined twice in [doc]

Hello there

When setting up logstash and elasticsearch I ran into the following issues when indexing the Haproxy logs:
[2018-05-04T13:42:29,144][WARN ][logstash.outputs.elasticsearch] Could not index event to Elasticsearch. {:status=>400, :action=>["index", {:_id=>nil, :_index=>"acc_app_haproxy-2018.05.04", :_type=>"doc", :_routing=>nil}, #LogStash::Event:0x7e305898], :response=>{"index"=>{"_index"=>"acc_app_haproxy-2018.05.04", "_type"=>"doc", "_id"=>"pcT1KmMByYtKIbIJ6CXz", "status"=>400, "error"=>{"type"=>"illegal_argument_exception", "reason"=>"Field [_id] is defined twice in [doc]"}}}}

In the elasticsearch logs I see the following all the time:
[2018-05-04T13:45:12,733][DEBUG][o.e.a.b.TransportShardBulkAction] [acc_app_haproxy-2018.05.04][0] failed to execute bulk item (index) BulkShardRequest [[acc_app_haproxy-2018.05.04][0]] containing [3] requests
java.lang.IllegalArgumentException: Field [_id] is defined twice in [doc]
at org.elasticsearch.index.mapper.MapperService.checkFieldUniqueness(MapperService.java:578) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.index.mapper.MapperService.internalMerge(MapperService.java:420) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.index.mapper.MapperService.internalMerge(MapperService.java:353) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.index.mapper.MapperService.merge(MapperService.java:285) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.metadata.MetaDataMappingService$PutMappingExecutor.applyRequest(MetaDataMappingService.java:313) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.metadata.MetaDataMappingService$PutMappingExecutor.execute(MetaDataMappingService.java:230) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.MasterService.executeTasks(MasterService.java:643) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.MasterService.calculateTaskOutputs(MasterService.java:273) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.MasterService.runTasks(MasterService.java:198) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.MasterService$Batcher.run(MasterService.java:133) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.TaskBatcher.runIfNotProcessed(TaskBatcher.java:150) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.TaskBatcher$BatchedTask.run(TaskBatcher.java:188) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:573) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:244) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:207) ~[elasticsearch-6.2.4.jar:6.2.4]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_141]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_141]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_141]

My logstash configuration for these logs is as follows:

Haproxy Logs

else if ( [document_type] == "haproxy" ) {
grok {
patterns_dir => ["/etc/logstash/pipeline/patterns"]
match => {
"message" => [
"^%{SYSLOGTIMESTAMP:timestamp} %{IPORHOST} %{PROG}(?:[%{POSINT}])?: %{IPORHOST:sourceIP}:%{POSINT:port}%{PIPE}{%{DATA:tenant}|%{DATA:userAgent}}%{PIPE}%{NOTSPACE:frontend} %{NOTSPACE:backend}/%{NOTSPACE:server} %{INT:timeClientRequest}/%{INT:timeQueue}/%{INT:timeTCP}/%{INT:timeServer}/%{INT:timeTotal} %{INT:statusCode} %{INT:bytes} %{INT:concurrentConnectionsProcess}/%{INT:concurrentConnectionsFrontend}/%{INT:concurrentConnectionsBackend}/%{INT:concurrentConnectionsServer}/%{INT:retries} %{INT:connectionsServer}/%{INT:connectionsBackend} "%{DATA:verb} %{DATA:request}"",
"^%{SYSLOGTIMESTAMP:timestamp} %{IPORHOST} %{PROG}(?:[%{POSINT}])?: %{IPORHOST:sourceIP}:%{POSINT:port} [%{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME}] %{NOTSPACE:frontend}: %{GREEDYDATA:haproxyError}",
"^%{SYSLOGTIMESTAMP:timestamp} %{IPORHOST} %{PROG}(?:[%{POSINT}])?: %{IPORHOST:sourceIP}:%{POSINT:port}%{PIPE}{%{DATA:tenant}|%{DATA:userAgent}}%{PIPE}%{NOTSPACE:frontend} %{NOTSPACE:backend}/%{NOTSPACE:server} %{INT:timeClientRequest}/%{INT:timeQueue}/%{INT:timeTCP}/%{INT:timeServer}/%{INT:timeTotal} %{INT:statusCode} %{INT:bytes} %{INT:concurrentConnectionsProcess}/%{INT:concurrentConnectionsFrontend}/%{INT:concurrentConnectionsBackend}/%{INT:concurrentConnectionsServer}/%{INT:retries} %{INT:connectionsServer}/%{INT:connectionsBackend} "%{GREEDYDATA:failedRequest}"",
"^%{SYSLOGTIMESTAMP:timestamp} %{IPORHOST} %{PROG}(?:[%{POSINT}])?: %{GREEDYDATA:logEntry}"
]
overwrite => [ "message" ]
}
break_on_match => true
}
date {
match => [ "timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss", "ISO8601" ]
target => "@timestamp"
timezone => "Europe/Brussels"
}
mutate {
convert => { "bytes" => "integer" }
convert => { "concurrentConnectionsBackend" => "integer" }
convert => { "concurrentConnectionsFrontend" => "integer" }
convert => { "concurrentConnectionsProcess" => "integer" }
convert => { "concurrentConnectionsServer" => "integer" }
convert => { "connectionsBackend" => "integer" }
convert => { "connectionsServer" => "integer" }
convert => { "port" => "integer" }
convert => { "retries" => "integer" }
convert => { "statusCode" => "integer" }
convert => { "timeClientRequest" => "integer" }
convert => { "timeQueue" => "integer" }
convert => { "timeServer" => "integer" }
convert => { "timeTCP" => "integer" }
convert => { "timeTotal" => "integer" }
}
}
mutate {
rename => { "env" => "[@metadata][env]" }
rename => { "app" => "[@metadata][app]" }
rename => { "source" => "file" }
remove_field => [ "@version", "offset", "document_type", "timestamp" ]
remove_tag => [ "beats_input_codec_plain_applied" ]
}
mutate {
rename => { "sourceIP" => "source" }
}

output {
else if [@metadata][app] == "haproxy" {
elasticsearch {
hosts => "localhost:9200"
index => "%{[@metadata][env]}app%{[@metadata][app]}-%{+YYYY.MM.dd}"
user => "elastic"
password => "changeme"
}
}

I stopped filebeat, removed the index and tried again but the errors keep occuring.

Is this a known issue or is there something wrong with my configuration?
Please help me

Thank you

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.