Field _id is defined twice in [doc]


(Jens Van Deynse) #1

Hello there

When setting up logstash and elasticsearch I ran into the following issues when indexing the Haproxy logs:
[2018-05-04T13:42:29,144][WARN ][logstash.outputs.elasticsearch] Could not index event to Elasticsearch. {:status=>400, :action=>["index", {:_id=>nil, :_index=>"acc_app_haproxy-2018.05.04", :_type=>"doc", :_routing=>nil}, #LogStash::Event:0x7e305898], :response=>{"index"=>{"_index"=>"acc_app_haproxy-2018.05.04", "_type"=>"doc", "_id"=>"pcT1KmMByYtKIbIJ6CXz", "status"=>400, "error"=>{"type"=>"illegal_argument_exception", "reason"=>"Field [_id] is defined twice in [doc]"}}}}

In the elasticsearch logs I see the following all the time:
[2018-05-04T13:45:12,733][DEBUG][o.e.a.b.TransportShardBulkAction] [acc_app_haproxy-2018.05.04][0] failed to execute bulk item (index) BulkShardRequest [[acc_app_haproxy-2018.05.04][0]] containing [3] requests
java.lang.IllegalArgumentException: Field [_id] is defined twice in [doc]
at org.elasticsearch.index.mapper.MapperService.checkFieldUniqueness(MapperService.java:578) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.index.mapper.MapperService.internalMerge(MapperService.java:420) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.index.mapper.MapperService.internalMerge(MapperService.java:353) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.index.mapper.MapperService.merge(MapperService.java:285) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.metadata.MetaDataMappingService$PutMappingExecutor.applyRequest(MetaDataMappingService.java:313) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.metadata.MetaDataMappingService$PutMappingExecutor.execute(MetaDataMappingService.java:230) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.MasterService.executeTasks(MasterService.java:643) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.MasterService.calculateTaskOutputs(MasterService.java:273) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.MasterService.runTasks(MasterService.java:198) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.MasterService$Batcher.run(MasterService.java:133) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.TaskBatcher.runIfNotProcessed(TaskBatcher.java:150) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cluster.service.TaskBatcher$BatchedTask.run(TaskBatcher.java:188) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:573) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:244) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:207) ~[elasticsearch-6.2.4.jar:6.2.4]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_141]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_141]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_141]

My logstash configuration for these logs is as follows:

Haproxy Logs

else if ( [document_type] == "haproxy" ) {
grok {
patterns_dir => ["/etc/logstash/pipeline/patterns"]
match => {
"message" => [
"^%{SYSLOGTIMESTAMP:timestamp} %{IPORHOST} %{PROG}(?:[%{POSINT}])?: %{IPORHOST:sourceIP}:%{POSINT:port}%{PIPE}{%{DATA:tenant}|%{DATA:userAgent}}%{PIPE}%{NOTSPACE:frontend} %{NOTSPACE:backend}/%{NOTSPACE:server} %{INT:timeClientRequest}/%{INT:timeQueue}/%{INT:timeTCP}/%{INT:timeServer}/%{INT:timeTotal} %{INT:statusCode} %{INT:bytes} %{INT:concurrentConnectionsProcess}/%{INT:concurrentConnectionsFrontend}/%{INT:concurrentConnectionsBackend}/%{INT:concurrentConnectionsServer}/%{INT:retries} %{INT:connectionsServer}/%{INT:connectionsBackend} "%{DATA:verb} %{DATA:request}"",
"^%{SYSLOGTIMESTAMP:timestamp} %{IPORHOST} %{PROG}(?:[%{POSINT}])?: %{IPORHOST:sourceIP}:%{POSINT:port} [%{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME}] %{NOTSPACE:frontend}: %{GREEDYDATA:haproxyError}",
"^%{SYSLOGTIMESTAMP:timestamp} %{IPORHOST} %{PROG}(?:[%{POSINT}])?: %{IPORHOST:sourceIP}:%{POSINT:port}%{PIPE}{%{DATA:tenant}|%{DATA:userAgent}}%{PIPE}%{NOTSPACE:frontend} %{NOTSPACE:backend}/%{NOTSPACE:server} %{INT:timeClientRequest}/%{INT:timeQueue}/%{INT:timeTCP}/%{INT:timeServer}/%{INT:timeTotal} %{INT:statusCode} %{INT:bytes} %{INT:concurrentConnectionsProcess}/%{INT:concurrentConnectionsFrontend}/%{INT:concurrentConnectionsBackend}/%{INT:concurrentConnectionsServer}/%{INT:retries} %{INT:connectionsServer}/%{INT:connectionsBackend} "%{GREEDYDATA:failedRequest}"",
"^%{SYSLOGTIMESTAMP:timestamp} %{IPORHOST} %{PROG}(?:[%{POSINT}])?: %{GREEDYDATA:logEntry}"
]
overwrite => [ "message" ]
}
break_on_match => true
}
date {
match => [ "timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss", "ISO8601" ]
target => "@timestamp"
timezone => "Europe/Brussels"
}
mutate {
convert => { "bytes" => "integer" }
convert => { "concurrentConnectionsBackend" => "integer" }
convert => { "concurrentConnectionsFrontend" => "integer" }
convert => { "concurrentConnectionsProcess" => "integer" }
convert => { "concurrentConnectionsServer" => "integer" }
convert => { "connectionsBackend" => "integer" }
convert => { "connectionsServer" => "integer" }
convert => { "port" => "integer" }
convert => { "retries" => "integer" }
convert => { "statusCode" => "integer" }
convert => { "timeClientRequest" => "integer" }
convert => { "timeQueue" => "integer" }
convert => { "timeServer" => "integer" }
convert => { "timeTCP" => "integer" }
convert => { "timeTotal" => "integer" }
}
}
mutate {
rename => { "env" => "[@metadata][env]" }
rename => { "app" => "[@metadata][app]" }
rename => { "source" => "file" }
remove_field => [ "@version", "offset", "document_type", "timestamp" ]
remove_tag => [ "beats_input_codec_plain_applied" ]
}
mutate {
rename => { "sourceIP" => "source" }
}

output {
else if [@metadata][app] == "haproxy" {
elasticsearch {
hosts => "localhost:9200"
index => "%{[@metadata][env]}app%{[@metadata][app]}-%{+YYYY.MM.dd}"
user => "elastic"
password => "changeme"
}
}

I stopped filebeat, removed the index and tried again but the errors keep occuring.

Is this a known issue or is there something wrong with my configuration?
Please help me

Thank you


(system) #2

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.