Fmutex hanging issue with elasticsearch + rabbitmq river + large dataset (>100M records)

Everything was chugging along, processing about 2k messages/sec and then
just hanged. Running strace on the process shows the trace below, which
persists even through restarts of elasticsearch. It seems that the issue
occurs right after a clone system call. Ideas?


open("/usr/share/java/elasticsearch/lib/sigar/",
O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries /, 32768) = 136
getdents(3, /
0 entries */, 32768) = 0
close(3) = 0
mmap(NULL, 135168, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f4c05f79000
mprotect(0x7f4c05f79000, 4096, PROT_NONE) = 0
clone(child_stack=0x7f4c05f98ff0,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID,
parent_tidptr=0x7f4c05f999d0, tls=0x7f4c05f99700,
child_tidptr=0x7f4c05f999d0) = 32755
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL

Can you issue a jstack and gist the stack trace you get?

On Tue, Apr 17, 2012 at 2:40 AM, arimus arimus@gmail.com wrote:

Everything was chugging along, processing about 2k messages/sec and then
just hanged. Running strace on the process shows the trace below, which
persists even through restarts of elasticsearch. It seems that the issue
occurs right after a clone system call. Ideas?


open("/usr/share/java/elasticsearch/lib/sigar/",
O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries /, 32768) = 136
getdents(3, /
0 entries */, 32768) = 0
close(3) = 0
mmap(NULL, 135168, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f4c05f79000
mprotect(0x7f4c05f79000, 4096, PROT_NONE) = 0
clone(child_stack=0x7f4c05f98ff0,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID,
parent_tidptr=0x7f4c05f999d0, tls=0x7f4c05f99700,
child_tidptr=0x7f4c05f999d0) = 32755
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL

I ended up doing the following:

  • upgrading from rabbitmq-server-2.6.1-1.el6 to rabbitmq-server-2.8.1-1
  • disabling a couple other elasticsearch instances that were trying to
    multicast discover on the same subnet
  • restarting everything

So far, another 100M+ messages processed without issues, but I will do the
jstack if I see the issue again. Looks like there was a post
(Redirecting to Google Groups) yesterday
that was also futex-related, on debian and without the river though.
Perhaps I just haven't run into it again yet. Thanks, will keep you
posted.

On Tuesday, April 17, 2012 6:20:10 AM UTC-7, kimchy wrote:

Can you issue a jstack and gist the stack trace you get?

On Tue, Apr 17, 2012 at 2:40 AM, arimus wrote:

Everything was chugging along, processing about 2k messages/sec and then
just hanged. Running strace on the process shows the trace below, which
persists even through restarts of elasticsearch. It seems that the issue
occurs right after a clone system call. Ideas?


open("/usr/share/java/elasticsearch/lib/sigar/",
O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries /, 32768) = 136
getdents(3, /
0 entries */, 32768) = 0
close(3) = 0
mmap(NULL, 135168, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f4c05f79000
mprotect(0x7f4c05f79000, 4096, PROT_NONE) = 0
clone(child_stack=0x7f4c05f98ff0,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID,
parent_tidptr=0x7f4c05f999d0, tls=0x7f4c05f99700,
child_tidptr=0x7f4c05f999d0) = 32755
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL

Alright, it finally died again and here is the jstack:

es.jstack · GitHub

I'm keeping it hanged until we get to the bottom of this, so I'm willing to
jump in IRC and pow-wow on finding a solution to get things running again.
Just let me know.

On Wednesday, April 18, 2012 5:43:23 PM UTC-7, arimus wrote:

I ended up doing the following:

  • upgrading from rabbitmq-server-2.6.1-1.el6 to rabbitmq-server-2.8.1-1
  • disabling a couple other elasticsearch instances that were trying to
    multicast discover on the same subnet
  • restarting everything

So far, another 100M+ messages processed without issues, but I will do the
jstack if I see the issue again. Looks like there was a post (
Redirecting to Google Groups) yesterday
that was also futex-related, on debian and without the river though.
Perhaps I just haven't run into it again yet. Thanks, will keep you
posted.

On Tuesday, April 17, 2012 6:20:10 AM UTC-7, kimchy wrote:

Can you issue a jstack and gist the stack trace you get?

On Tue, Apr 17, 2012 at 2:40 AM, arimus wrote:

Everything was chugging along, processing about 2k messages/sec and then
just hanged. Running strace on the process shows the trace below, which
persists even through restarts of elasticsearch. It seems that the issue
occurs right after a clone system call. Ideas?


open("/usr/share/java/elasticsearch/lib/sigar/",
O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries /, 32768) = 136
getdents(3, /
0 entries */, 32768) = 0
close(3) = 0
mmap(NULL, 135168, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f4c05f79000
mprotect(0x7f4c05f79000, 4096, PROT_NONE) = 0
clone(child_stack=0x7f4c05f98ff0,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID,
parent_tidptr=0x7f4c05f999d0, tls=0x7f4c05f99700,
child_tidptr=0x7f4c05f999d0) = 32755
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL

Also, strace -f on the elasticsearch process shows lots of futex connection
timeouts

https://raw.github.com/gist/2424865/a31d666c2629963f2f274ecfdf4b7f5a3881fbe6/gistfile1.txt

On Thursday, April 19, 2012 12:27:18 PM UTC-7, arimus wrote:

Alright, it finally died again and here is the jstack:

es.jstack · GitHub

I'm keeping it hanged until we get to the bottom of this, so I'm willing
to jump in IRC and pow-wow on finding a solution to get things running
again. Just let me know.

On Wednesday, April 18, 2012 5:43:23 PM UTC-7, arimus wrote:

I ended up doing the following:

  • upgrading from rabbitmq-server-2.6.1-1.el6
    to rabbitmq-server-2.8.1-1
  • disabling a couple other elasticsearch instances that were trying
    to multicast discover on the same subnet
  • restarting everything

So far, another 100M+ messages processed without issues, but I will do
the jstack if I see the issue again. Looks like there was a post (
Redirecting to Google Groups) yesterday
that was also futex-related, on debian and without the river though.
Perhaps I just haven't run into it again yet. Thanks, will keep you
posted.

On Tuesday, April 17, 2012 6:20:10 AM UTC-7, kimchy wrote:

Can you issue a jstack and gist the stack trace you get?

On Tue, Apr 17, 2012 at 2:40 AM, arimus wrote:

Everything was chugging along, processing about 2k messages/sec and
then just hanged. Running strace on the process shows the trace below,
which persists even through restarts of elasticsearch. It seems that the
issue occurs right after a clone system call. Ideas?


open("/usr/share/java/elasticsearch/lib/sigar/",
O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries /, 32768) = 136
getdents(3, /
0 entries */, 32768) = 0
close(3) = 0
mmap(NULL, 135168, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f4c05f79000
mprotect(0x7f4c05f79000, 4096, PROT_NONE) = 0
clone(child_stack=0x7f4c05f98ff0,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID,
parent_tidptr=0x7f4c05f999d0, tls=0x7f4c05f99700,
child_tidptr=0x7f4c05f999d0) = 32755
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL

Well, decided to profile elasticsearch with YourKit to see what I could
see. Looking at threads that appear blocked...

Lots of org.apache.log4j.spi.RootLogger and
org.elasticsearch.common.jackson.sym.BytesToNameCanonicalizer blocking on
the elasticsearch[bulk] thread. So is it blocking on the elasticsearch
river plugin then, or does it look like this just because that's 99% of the
work that my instance is doing...and there is an underlying issue blocking
things in the middle of regular processing? Seems likely that it's the
later.

Interestingly, there are also a bunch of threads which appear blocked
at java.security.SecureRandom. I have seen reference to random device
entropy being a root cause for some java futex wait hang-ups
(http://www.springone2gx.com/blog/pratik_patel/2010/01/solution_futex_wait_hangs_java_on_linux__ubuntu_in_vmware_or_virtual_box).
Could be another red herring, but figured I would mention it. Perhaps the
UUID generation for elasticsearch is depleting the SecureRandom entropy.

I'm going to give /dev/urandom a go for now and see if the issue goes away
(e.g. -Djava.security.egd=file:/dev/urandom).

On Thursday, April 19, 2012 4:38:40 PM UTC-7, arimus wrote:

Also, strace -f on the elasticsearch process shows lots of futex
connection timeouts

https://raw.github.com/gist/2424865/a31d666c2629963f2f274ecfdf4b7f5a3881fbe6/gistfile1.txt

On Thursday, April 19, 2012 12:27:18 PM UTC-7, arimus wrote:

Alright, it finally died again and here is the jstack:

es.jstack · GitHub

I'm keeping it hanged until we get to the bottom of this, so I'm willing
to jump in IRC and pow-wow on finding a solution to get things running
again. Just let me know.

On Wednesday, April 18, 2012 5:43:23 PM UTC-7, arimus wrote:

I ended up doing the following:

  • upgrading from rabbitmq-server-2.6.1-1.el6
    to rabbitmq-server-2.8.1-1
  • disabling a couple other elasticsearch instances that were trying
    to multicast discover on the same subnet
  • restarting everything

So far, another 100M+ messages processed without issues, but I will do
the jstack if I see the issue again. Looks like there was a post (
Redirecting to Google Groups) yesterday
that was also futex-related, on debian and without the river though.
Perhaps I just haven't run into it again yet. Thanks, will keep you
posted.

On Tuesday, April 17, 2012 6:20:10 AM UTC-7, kimchy wrote:

Can you issue a jstack and gist the stack trace you get?

On Tue, Apr 17, 2012 at 2:40 AM, arimus wrote:

Everything was chugging along, processing about 2k messages/sec and
then just hanged. Running strace on the process shows the trace below,
which persists even through restarts of elasticsearch. It seems that the
issue occurs right after a clone system call. Ideas?


open("/usr/share/java/elasticsearch/lib/sigar/",
O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries /, 32768) = 136
getdents(3, /
0 entries */, 32768) = 0
close(3) = 0
mmap(NULL, 135168, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f4c05f79000
mprotect(0x7f4c05f79000, 4096, PROT_NONE) = 0
clone(child_stack=0x7f4c05f98ff0,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID,
parent_tidptr=0x7f4c05f999d0, tls=0x7f4c05f99700,
child_tidptr=0x7f4c05f999d0) = 32755
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL

So far, making this change and upping the elasticsearch min/max heap size
seems to be helping. Going to reset everything and let it soak over the
weekend to see if I can get it to run solid for a few days without any
hiccups. Still seeing some index related issues on restart though. I've
logged that issue as a bug
(Index issues on restarts with defaults · Issue #1871 · elastic/elasticsearch · GitHub) and will see
if it crops up again after resetting everything.

On Thursday, April 19, 2012 8:31:23 PM UTC-7, arimus wrote:

Well, decided to profile elasticsearch with YourKit to see what I could
see. Looking at threads that appear blocked...

Lots of org.apache.log4j.spi.RootLogger and
org.elasticsearch.common.jackson.sym.BytesToNameCanonicalizer blocking on
the elasticsearch[bulk] thread. So is it blocking on the elasticsearch
river plugin then, or does it look like this just because that's 99% of the
work that my instance is doing...and there is an underlying issue blocking
things in the middle of regular processing? Seems likely that it's the
later.

Skitch | Evernote

Interestingly, there are also a bunch of threads which appear blocked
at java.security.SecureRandom. I have seen reference to random device
entropy being a root cause for some java futex wait hang-ups (
http://www.springone2gx.com/blog/pratik_patel/2010/01/solution_futex_wait_hangs_java_on_linux__ubuntu_in_vmware_or_virtual_box).
Could be another red herring, but figured I would mention it. Perhaps the
UUID generation for elasticsearch is depleting the SecureRandom entropy.

Skitch | Evernote

I'm going to give /dev/urandom a go for now and see if the issue goes away
(e.g. -Djava.security.egd=file:/dev/urandom).

On Thursday, April 19, 2012 4:38:40 PM UTC-7, arimus wrote:

Also, strace -f on the elasticsearch process shows lots of futex
connection timeouts

https://raw.github.com/gist/2424865/a31d666c2629963f2f274ecfdf4b7f5a3881fbe6/gistfile1.txt

On Thursday, April 19, 2012 12:27:18 PM UTC-7, arimus wrote:

Alright, it finally died again and here is the jstack:

es.jstack · GitHub

I'm keeping it hanged until we get to the bottom of this, so I'm willing
to jump in IRC and pow-wow on finding a solution to get things running
again. Just let me know.

On Wednesday, April 18, 2012 5:43:23 PM UTC-7, arimus wrote:

I ended up doing the following:

  • upgrading from rabbitmq-server-2.6.1-1.el6
    to rabbitmq-server-2.8.1-1
  • disabling a couple other elasticsearch instances that were trying
    to multicast discover on the same subnet
  • restarting everything

So far, another 100M+ messages processed without issues, but I will do
the jstack if I see the issue again. Looks like there was a post (
Redirecting to Google Groups) yesterday
that was also futex-related, on debian and without the river though.
Perhaps I just haven't run into it again yet. Thanks, will keep you
posted.

On Tuesday, April 17, 2012 6:20:10 AM UTC-7, kimchy wrote:

Can you issue a jstack and gist the stack trace you get?

On Tue, Apr 17, 2012 at 2:40 AM, arimus wrote:

Everything was chugging along, processing about 2k messages/sec and
then just hanged. Running strace on the process shows the trace below,
which persists even through restarts of elasticsearch. It seems that the
issue occurs right after a clone system call. Ideas?


open("/usr/share/java/elasticsearch/lib/sigar/",
O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries /, 32768) = 136
getdents(3, /
0 entries */, 32768) = 0
close(3) = 0
mmap(NULL, 135168, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f4c05f79000
mprotect(0x7f4c05f79000, 4096, PROT_NONE) = 0
clone(child_stack=0x7f4c05f98ff0,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID,
parent_tidptr=0x7f4c05f999d0, tls=0x7f4c05f99700,
child_tidptr=0x7f4c05f999d0) = 32755
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL

Things did indeed seem happier with more memory and the /dev/urandom tweak
from what I could tell. Deleted some old indexes and letting it continue
to soak over the weekend. Will let folks know if I see the same issue
again (I may just be delaying the problem) or if this resolves the issues
more permanently.

On Thursday, April 19, 2012 8:31:23 PM UTC-7, arimus wrote:

Well, decided to profile elasticsearch with YourKit to see what I could
see. Looking at threads that appear blocked...

Lots of org.apache.log4j.spi.RootLogger and
org.elasticsearch.common.jackson.sym.BytesToNameCanonicalizer blocking on
the elasticsearch[bulk] thread. So is it blocking on the elasticsearch
river plugin then, or does it look like this just because that's 99% of the
work that my instance is doing...and there is an underlying issue blocking
things in the middle of regular processing? Seems likely that it's the
later.

Skitch | Evernote

Interestingly, there are also a bunch of threads which appear blocked
at java.security.SecureRandom. I have seen reference to random device
entropy being a root cause for some java futex wait hang-ups (
http://www.springone2gx.com/blog/pratik_patel/2010/01/solution_futex_wait_hangs_java_on_linux__ubuntu_in_vmware_or_virtual_box).
Could be another red herring, but figured I would mention it. Perhaps the
UUID generation for elasticsearch is depleting the SecureRandom entropy.

Skitch | Evernote

I'm going to give /dev/urandom a go for now and see if the issue goes away
(e.g. -Djava.security.egd=file:/dev/urandom).

On Thursday, April 19, 2012 4:38:40 PM UTC-7, arimus wrote:

Also, strace -f on the elasticsearch process shows lots of futex
connection timeouts

https://raw.github.com/gist/2424865/a31d666c2629963f2f274ecfdf4b7f5a3881fbe6/gistfile1.txt

On Thursday, April 19, 2012 12:27:18 PM UTC-7, arimus wrote:

Alright, it finally died again and here is the jstack:

es.jstack · GitHub

I'm keeping it hanged until we get to the bottom of this, so I'm willing
to jump in IRC and pow-wow on finding a solution to get things running
again. Just let me know.

On Wednesday, April 18, 2012 5:43:23 PM UTC-7, arimus wrote:

I ended up doing the following:

  • upgrading from rabbitmq-server-2.6.1-1.el6
    to rabbitmq-server-2.8.1-1
  • disabling a couple other elasticsearch instances that were trying
    to multicast discover on the same subnet
  • restarting everything

So far, another 100M+ messages processed without issues, but I will do
the jstack if I see the issue again. Looks like there was a post (
Redirecting to Google Groups) yesterday
that was also futex-related, on debian and without the river though.
Perhaps I just haven't run into it again yet. Thanks, will keep you
posted.

On Tuesday, April 17, 2012 6:20:10 AM UTC-7, kimchy wrote:

Can you issue a jstack and gist the stack trace you get?

On Tue, Apr 17, 2012 at 2:40 AM, arimus wrote:

Everything was chugging along, processing about 2k messages/sec and
then just hanged. Running strace on the process shows the trace below,
which persists even through restarts of elasticsearch. It seems that the
issue occurs right after a clone system call. Ideas?


open("/usr/share/java/elasticsearch/lib/sigar/",
O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries /, 32768) = 136
getdents(3, /
0 entries */, 32768) = 0
close(3) = 0
mmap(NULL, 135168, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f4c05f79000
mprotect(0x7f4c05f79000, 4096, PROT_NONE) = 0
clone(child_stack=0x7f4c05f98ff0,
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID,
parent_tidptr=0x7f4c05f999d0, tls=0x7f4c05f99700,
child_tidptr=0x7f4c05f999d0) = 32755
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL) = ? ERESTARTSYS (To be
restarted)
--- SIGINT (Interrupt) @ 0 (0) ---
futex(0x7f4c06c23440, FUTEX_WAKE_PRIVATE, 1) = 1
rt_sigreturn(0x7f4c06c23440) = 202
futex(0x7f4c05f999d0, FUTEX_WAIT, 32755, NULL