I'm afraid this is still happening, even after upgrading both elasticsearch
and logstash to the latest version:
elasticsearch: 0.90.2
logstash: 1.1.13
most recent stack trace from the elasticsearch logs:
[2013-07-12 14:20:57,078][WARN ][index.shard.service ] [chiana]
[logstash-2013.07.12][3] Failed to perform scheduled engine refresh
org.elasticsearch.index.engine.RefreshFailedEngineException:
[logstash-2013.07.12][3] Refresh failed
at
org.elasticsearch.index.engine.robin.RobinEngine.refresh(RobinEngine.java:796)
at
org.elasticsearch.index.shard.service.InternalIndexShard.refresh(InternalIndexShard.java:412)
at
org.elasticsearch.index.shard.service.InternalIndexShard$EngineRefresher$1.run(InternalIndexShard.java:755)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:722)
Caused by: java.io.EOFException: read past EOF:
NIOFSIndexInput(path="/var/lib/elasticsearch/logstash/nodes/0/indices/logstash-2013.07.12/3/index/_14b.fdx")
at
org.apache.lucene.store.BufferedIndexInput.refill(BufferedIndexInput.java:266)
at
org.apache.lucene.store.BufferedIndexInput.readByte(BufferedIndexInput.java:51)
at org.apache.lucene.store.DataInput.readInt(DataInput.java:84)
at
org.apache.lucene.store.BufferedIndexInput.readInt(BufferedIndexInput.java:181)
at
org.apache.lucene.codecs.CodecUtil.checkHeader(CodecUtil.java:126)
at
org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.(CompressingStoredFieldsReader.java:102)
at
org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat.fieldsReader(CompressingStoredFieldsFormat.java:113)
at
org.apache.lucene.index.SegmentCoreReaders.(SegmentCoreReaders.java:147)
at
org.apache.lucene.index.SegmentReader.(SegmentReader.java:56)
at
org.apache.lucene.index.ReadersAndLiveDocs.getReader(ReadersAndLiveDocs.java:121)
at
org.apache.lucene.index.ReadersAndLiveDocs.getReadOnlyClone(ReadersAndLiveDocs.java:218)
at
org.apache.lucene.index.StandardDirectoryReader.open(StandardDirectoryReader.java:100)
at
org.apache.lucene.index.IndexWriter.getReader(IndexWriter.java:377)
at
org.apache.lucene.index.StandardDirectoryReader.doOpenFromWriter(StandardDirectoryReader.java:275)
at
org.apache.lucene.index.StandardDirectoryReader.doOpenIfChanged(StandardDirectoryReader.java:250)
at
org.apache.lucene.index.StandardDirectoryReader.doOpenIfChanged(StandardDirectoryReader.java:240)
at
org.apache.lucene.index.DirectoryReader.openIfChanged(DirectoryReader.java:170)
at
org.apache.lucene.search.SearcherManager.refreshIfNeeded(SearcherManager.java:118)
at
org.apache.lucene.search.SearcherManager.refreshIfNeeded(SearcherManager.java:58)
at
org.apache.lucene.search.ReferenceManager.doMaybeRefresh(ReferenceManager.java:155)
at
org.apache.lucene.search.ReferenceManager.maybeRefresh(ReferenceManager.java:204)
at
org.elasticsearch.index.engine.robin.RobinEngine.refresh(RobinEngine.java:777)
... 5 more
[2013-07-12 14:20:58,144][WARN ][index.shard.service ] [chiana]
[logstash-2013.07.12][3] Failed to perform scheduled engine refresh
org.elasticsearch.index.engine.RefreshFailedEngineException:
[logstash-2013.07.12][3] Refresh failed
at
org.elasticsearch.index.engine.robin.RobinEngine.refresh(RobinEngine.java:796)
at
org.elasticsearch.index.shard.service.InternalIndexShard.refresh(InternalIndexShard.java:412)
at
org.elasticsearch.index.shard.service.InternalIndexShard$EngineRefresher$1.run(InternalIndexShard.java:755)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:722)
Caused by: java.io.EOFException: read past EOF:
NIOFSIndexInput(path="/var/lib/elasticsearch/logstash/nodes/0/indices/logstash-2013.07.12/3/index/_14b.fdx")
at
org.apache.lucene.store.BufferedIndexInput.refill(BufferedIndexInput.java:266)
at
org.apache.lucene.store.BufferedIndexInput.readByte(BufferedIndexInput.java:51)
at org.apache.lucene.store.DataInput.readInt(DataInput.java:84)
at
org.apache.lucene.store.BufferedIndexInput.readInt(BufferedIndexInput.java:181)
at
org.apache.lucene.codecs.CodecUtil.checkHeader(CodecUtil.java:126)
at
org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.(CompressingStoredFieldsReader.java:102)
at
org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat.fieldsReader(CompressingStoredFieldsFormat.java:113)
at
org.apache.lucene.index.SegmentCoreReaders.(SegmentCoreReaders.java:147)
at
org.apache.lucene.index.SegmentReader.(SegmentReader.java:56)
at
org.apache.lucene.index.ReadersAndLiveDocs.getReader(ReadersAndLiveDocs.java:121)
at
org.apache.lucene.index.ReadersAndLiveDocs.getReadOnlyClone(ReadersAndLiveDocs.java:218)
at
org.apache.lucene.index.StandardDirectoryReader.open(StandardDirectoryReader.java:100)
at
org.apache.lucene.index.IndexWriter.getReader(IndexWriter.java:377)
at
org.apache.lucene.index.StandardDirectoryReader.doOpenFromWriter(StandardDirectoryReader.java:275)
at
org.apache.lucene.index.StandardDirectoryReader.doOpenIfChanged(StandardDirectoryReader.java:250)
at
org.apache.lucene.index.StandardDirectoryReader.doOpenIfChanged(StandardDirectoryReader.java:240)
at
org.apache.lucene.index.DirectoryReader.openIfChanged(DirectoryReader.java:170)
at
org.apache.lucene.search.SearcherManager.refreshIfNeeded(SearcherManager.java:118)
at
org.apache.lucene.search.SearcherManager.refreshIfNeeded(SearcherManager.java:58)
at
org.apache.lucene.search.ReferenceManager.doMaybeRefresh(ReferenceManager.java:155)
at
org.apache.lucene.search.ReferenceManager.maybeRefresh(ReferenceManager.java:204)
at
org.elasticsearch.index.engine.robin.RobinEngine.refresh(RobinEngine.java:777)
... 5 more
[2013-07-12 14:21:00,155][WARN ][index.shard.service ] [chiana]
[logstash-2013.07.12][3] Failed to perform scheduled engine refresh
org.elasticsearch.index.engine.RefreshFailedEngineException:
[logstash-2013.07.12][3] Refresh failed
at
org.elasticsearch.index.engine.robin.RobinEngine.refresh(RobinEngine.java:796)
at
org.elasticsearch.index.shard.service.InternalIndexShard.refresh(InternalIndexShard.java:412)
at
org.elasticsearch.index.shard.service.InternalIndexShard$EngineRefresher$1.run(InternalIndexShard.java:755)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:722)
Caused by: java.io.EOFException: read past EOF:
NIOFSIndexInput(path="/var/lib/elasticsearch/logstash/nodes/0/indices/logstash-2013.07.12/3/index/_14b.fdx")
at
org.apache.lucene.store.BufferedIndexInput.refill(BufferedIndexInput.java:266)
at
org.apache.lucene.store.BufferedIndexInput.readByte(BufferedIndexInput.java:51)
at org.apache.lucene.store.DataInput.readInt(DataInput.java:84)
at
org.apache.lucene.store.BufferedIndexInput.readInt(BufferedIndexInput.java:181)
at
org.apache.lucene.codecs.CodecUtil.checkHeader(CodecUtil.java:126)
at
org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.(CompressingStoredFieldsReader.java:102)
at
org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat.fieldsReader(CompressingStoredFieldsFormat.java:113)
at
org.apache.lucene.index.SegmentCoreReaders.(SegmentCoreReaders.java:147)
at
org.apache.lucene.index.SegmentReader.(SegmentReader.java:56)
at
org.apache.lucene.index.ReadersAndLiveDocs.getReader(ReadersAndLiveDocs.java:121)
at
org.apache.lucene.index.ReadersAndLiveDocs.getReadOnlyClone(ReadersAndLiveDocs.java:218)
at
org.apache.lucene.index.StandardDirectoryReader.open(StandardDirectoryReader.java:100)
at
org.apache.lucene.index.IndexWriter.getReader(IndexWriter.java:377)
at
org.apache.lucene.index.StandardDirectoryReader.doOpenFromWriter(StandardDirectoryReader.java:275)
at
org.apache.lucene.index.StandardDirectoryReader.doOpenIfChanged(StandardDirectoryReader.java:250)
at
org.apache.lucene.index.StandardDirectoryReader.doOpenIfChanged(StandardDirectoryReader.java:240)
at
org.apache.lucene.index.DirectoryReader.openIfChanged(DirectoryReader.java:170)
at
org.apache.lucene.search.SearcherManager.refreshIfNeeded(SearcherManager.java:118)
at
org.apache.lucene.search.SearcherManager.refreshIfNeeded(SearcherManager.java:58)
at
org.apache.lucene.search.ReferenceManager.doMaybeRefresh(ReferenceManager.java:155)
at
org.apache.lucene.search.ReferenceManager.maybeRefresh(ReferenceManager.java:204)
at
org.elasticsearch.index.engine.robin.RobinEngine.refresh(RobinEngine.java:777)
... 5 more
[2013-07-12 14:21:09,925][WARN ][index.merge.scheduler ] [chiana]
[logstash-2013.07.12][3] failed to merge
java.io.EOFException: read past EOF:
NIOFSIndexInput(path="/var/lib/elasticsearch/logstash/nodes/0/indices/logstash-2013.07.12/3/index/_14b.fdx")
at
org.apache.lucene.store.BufferedIndexInput.refill(BufferedIndexInput.java:266)
at
org.apache.lucene.store.BufferedIndexInput.readByte(BufferedIndexInput.java:51)
at org.apache.lucene.store.DataInput.readInt(DataInput.java:84)
at
org.apache.lucene.store.BufferedIndexInput.readInt(BufferedIndexInput.java:181)
at
org.apache.lucene.codecs.CodecUtil.checkHeader(CodecUtil.java:126)
at
org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.(CompressingStoredFieldsReader.java:102)
at
org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat.fieldsReader(CompressingStoredFieldsFormat.java:113)
at
org.apache.lucene.index.SegmentCoreReaders.(SegmentCoreReaders.java:147)
at
org.apache.lucene.index.SegmentReader.(SegmentReader.java:56)
at
org.apache.lucene.index.ReadersAndLiveDocs.getMergeReader(ReadersAndLiveDocs.java:153)
at
org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:3700)
at org.apache.lucene.index.IndexWriter.merge(IndexWriter.java:3370)
at
org.apache.lucene.index.ConcurrentMergeScheduler.doMerge(ConcurrentMergeScheduler.java:401)
at
org.apache.lucene.index.TrackingConcurrentMergeScheduler.doMerge(TrackingConcurrentMergeScheduler.java:91)
at
org.apache.lucene.index.ConcurrentMergeScheduler$MergeThread.run(ConcurrentMergeScheduler.java:478)
[2013-07-12 14:21:09,926][WARN ][index.engine.robin ] [chiana]
[logstash-2013.07.12][3] failed engine
org.apache.lucene.index.MergePolicy$MergeException: java.io.EOFException:
read past EOF:
NIOFSIndexInput(path="/var/lib/elasticsearch/logstash/nodes/0/indices/logstash-2013.07.12/3/index/_14b.fdx")
at
org.elasticsearch.index.merge.scheduler.ConcurrentMergeSchedulerProvider$CustomConcurrentMergeScheduler.handleMergeException(ConcurrentMergeSchedulerProvider.java:100)
at
org.apache.lucene.index.ConcurrentMergeScheduler$MergeThread.run(ConcurrentMergeScheduler.java:514)
Caused by: java.io.EOFException: read past EOF:
NIOFSIndexInput(path="/var/lib/elasticsearch/logstash/nodes/0/indices/logstash-2013.07.12/3/index/_14b.fdx")
at
org.apache.lucene.store.BufferedIndexInput.refill(BufferedIndexInput.java:266)
at
org.apache.lucene.store.BufferedIndexInput.readByte(BufferedIndexInput.java:51)
at org.apache.lucene.store.DataInput.readInt(DataInput.java:84)
at
org.apache.lucene.store.BufferedIndexInput.readInt(BufferedIndexInput.java:181)
at
org.apache.lucene.codecs.CodecUtil.checkHeader(CodecUtil.java:126)
at
org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.(CompressingStoredFieldsReader.java:102)
at
org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat.fieldsReader(CompressingStoredFieldsFormat.java:113)
at
org.apache.lucene.index.SegmentCoreReaders.(SegmentCoreReaders.java:147)
at
org.apache.lucene.index.SegmentReader.(SegmentReader.java:56)
at
org.apache.lucene.index.ReadersAndLiveDocs.getMergeReader(ReadersAndLiveDocs.java:153)
at
org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:3700)
at org.apache.lucene.index.IndexWriter.merge(IndexWriter.java:3370)
at
org.apache.lucene.index.ConcurrentMergeScheduler.doMerge(ConcurrentMergeScheduler.java:401)
at
org.apache.lucene.index.TrackingConcurrentMergeScheduler.doMerge(TrackingConcurrentMergeScheduler.java:91)
at
org.apache.lucene.index.ConcurrentMergeScheduler$MergeThread.run(ConcurrentMergeScheduler.java:478)
[2013-07-12 14:21:10,015][WARN ][cluster.action.shard ] [chiana]
sending failed shard for [logstash-2013.07.12][3],
node[NLPrKhOdQ1GIoNMCN1IyHg], [P], s[STARTED], reason [engine failure,
message [MergeException[java.io.EOFException: read past EOF:
NIOFSIndexInput(path="/var/lib/elasticsearch/logstash/nodes/0/indices/logstash-2013.07.12/3/index/_14b.fdx")];
nested: EOFException[read past EOF:
NIOFSIndexInput(path="/var/lib/elasticsearch/logstash/nodes/0/indices/logstash-2013.07.12/3/index/_14b.fdx")];
]]
[2013-07-12 14:21:10,016][WARN ][cluster.action.shard ] [chiana]
received shard failed for [logstash-2013.07.12][3],
node[NLPrKhOdQ1GIoNMCN1IyHg], [P], s[STARTED], reason [engine failure,
message [MergeException[java.io.EOFException: read past EOF:
NIOFSIndexInput(path="/var/lib/elasticsearch/logstash/nodes/0/indices/logstash-2013.07.12/3/index/_14b.fdx")];
nested: EOFException[read past EOF:
NIOFSIndexInput(path="/var/lib/elasticsearch/logstash/nodes/0/indices/logstash-2013.07.12/3/index/_14b.fdx")];
]]
Not sure what else to look at here ...
After this happens, logs stop being filtered into logstash, and the health
check shows:
{
"cluster_name" : "logstash",
"status" : "red",
"timed_out" : false,
"number_of_nodes" : 1,
"number_of_data_nodes" : 1,
"active_primary_shards" : 4,
"active_shards" : 4,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 1
}
Any suggestions/help would be greatly appreciated..
Cheers,
Andrew
On Thu, Jul 11, 2013 at 8:16 AM, Andrew Stangl andrewstangl@gmail.comwrote:
Awesome, thanks very much - I'll attempt the upgrade and report back here
I've been consistently fixing the indices, but it's a maintenance overhead
at the moment, and not sustainable .. let's hope the upgrade resolves the
issue
Cheers!
Andrew
On Thursday, July 11, 2013 8:09:08 AM UTC+1, Alexander Reelsen wrote:
Hey,
upgrading is always worth a try. Please keep the google group informed,
if this solved your issue. As a side note, I hope you removed all the
indices which showed that exception (or at least stopped to try to write
into them), as these are corrupted most likely from a lucene point of view.
Thanks!
--Alex
On Thu, Jul 11, 2013 at 9:01 AM, Andrew Stangl andrew...@gmail.comwrote:
Hi,
I enabled index.shard.check_on_startup: true, and the log shows
that it's now checking the indices/shards on startup ... but the problem
persists.
Strangely, although there are "Failed to perform scheduled engine
refresh" messages constantly in the ES log, the logstash implementation
still appears to be functioning this morning, after running throughout the
night, and the logs continue to be indexed and are view-able in the kibana
interface.
I'm going to upgrade the ES and logstash packages, since we're on 0.20.6
and 1.1.9 respectively, and perhaps the newer 0.90 implementation will
resolve this issue.
Thanks,
Andrew
On Wednesday, July 10, 2013 9:38:32 PM UTC+1, Andrew Stangl wrote:
Hi Jörg,
The server currently has more than 90% free space on the partition with
the elasticsearch data store; this is from a completely fresh index created
automatically by logstash. We did originally experience disk space issues,
but subsequently added a very large volume, and started from fresh.
I'm now going to attempt to start the node with index.shard.check_on_**
star**tup: true, will let you know how it goes.
Thanks,
Andrew
On Wed, Jul 10, 2013 at 5:46 PM, Jörg Prante joerg...@gmail.comwrote:
It looks like there was temporarily enough disk space while the Lucene
index was written. If so, the index is corrupt and must be checked/repaired
with index.shard.check_on_startup setting on node startup.
Jörg
Am 10.07.13 17:55, schrieb Alexander Reelsen:
Hey,
is it possible that there is an exception in your logfiles before
this happens, which can shed some more light on this issue? Maybe you are
running out of file descriptors (wildly speculating here) or and
OutOfMemoryException happened or something...
--Alex
On Wed, Jul 10, 2013 at 5:29 PM, Andrew Stangl <andrew...@gmail.com<mailto:
andrew...@gmail.com******>> wrote:
Hi all,
I hope someone will be able to shed some light on this issue:
we're experiencing a problem affecting a single server
elasticsearch server which is being used to store and index tomcat
and syslog data pushed into ES via logstash.
The following entries are coming up in the elasticsearch log on
the server:
[2013-07-10 06:50:31,699][WARN ][index.shard.service ]
[chiana] [logstash-2013.07.10][3] Failed to perform scheduled
engine refresh
org.elasticsearch.index.**engine****.**
RefreshFailedEngineException:
[logstash-2013.07.10][3] Refresh failed
and
[2013-07-10 06:50:34,376][WARN ][index.merge.scheduler ]
[chiana] [logstash-2013.07.10][2] failed to merge
java.io.EOFException: read past EOF:
NIOFSIndexInput(path="/var/**lib****/elasticsearch/logstash/**
nodes/****0/indices/logstash-2013.**07.10/****2/index/_egi.fnm")
the files in the "failed to merge" paths indicated all appear to
be zero length, not sure whether this is significant.
The logstash server will continue to feed logs into Elasticsearch,
in spite of these messages appearing, but eventually it falls
over, after an indeterminate length of time.
When the logstash server is unable to index into ES, it appears as
though the ES server is rejecting connections. and logstash shows
"unable to index event" messages in it's logs... then the indexes
appear to be corrupt, and at this point I've needed to stop the ES
daemon, and run the lucene index fix described here -
http://elasticsearch-users.**115****913.n3.nabble.com/Shard-**
index-****gone-bad-anyone-know-how-to-fix-this-java-io-
EOFException-read-past-EOF-**NIOFSIndexInput-**tp
4027683p4028934.htmlhttp://elasticsearch-users.115913.n3.nabble.com/Shard-index-gone-bad-anyone-know-how-to-fix-this-java-io-EOFException-read-past-EOF-NIOFSIndexInput-tp4027683p4028934.html
Once I restart the ES daemon, all seems okay for a while .. then
the problem starts happening again :-/
Is it possible that we're reaching some sort of limitation on the
size of the document that is being pushed into ES by logstash? Is
there any other reason that we would be seeing the log entries
described above?
Thanks in advance!
Andrew
-- You received this message because you are subscribed to
the Google
Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it,
send an email to elasticsearc...@**goog****legroups.com
mailto:elasticsearch%**2Bunsubs****cribe@googlegroups.com**.
For more options, visit https://groups.google.com/**grou****
ps/opt_out https://groups.google.com/groups/opt_out.
--
You received this message because you are subscribed to the Google
Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it,
send an email to elasticsearc...@**goog****legroups.com.
For more options, visit https://groups.google.com/**grou****
ps/opt_out https://groups.google.com/groups/opt_out.
--
You received this message because you are subscribed to a topic in the
Google Groups "elasticsearch" group.
To unsubscribe from this topic, visit https://groups.google.com/d/**to
****pic/elasticsearch/**2Yhn5FUZAKM/unsubscribehttps://groups.google.com/d/topic/elasticsearch/2Yhn5FUZAKM/unsubscribe
.
To unsubscribe from this group and all its topics, send an email to
elasticsearc...@**googlegroups.com.
For more options, visit https://groups.google.com/**grou****ps/opt_outhttps://groups.google.com/groups/opt_out
.
--
You received this message because you are subscribed to the Google
Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send
an email to elasticsearc...@**googlegroups.com.
For more options, visit https://groups.google.com/**groups/opt_outhttps://groups.google.com/groups/opt_out
.
--
You received this message because you are subscribed to a topic in the
Google Groups "elasticsearch" group.
To unsubscribe from this topic, visit
https://groups.google.com/d/topic/elasticsearch/2Yhn5FUZAKM/unsubscribe.
To unsubscribe from this group and all its topics, send an email to
elasticsearch+unsubscribe@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.
--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.