I have an elk cluster with 3 master nodes, 4 hot data nodes and 4 cold data nodes, and the disks are SSDs. Recently, it was found that the CPU load of the ES cluster is high, but the utilization rate is not high. The following is the jstack data of a node
"elasticsearch[es-hotdata167][transport_worker][T#4]" #39 daemon prio=5 os_prio=0 cpu=1168289483.13ms elapsed=7337110.74s tid=0x00007f2d00000d80 nid=0x67d runnable [0x00007f2d1f1f3000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPoll.wait(java.base@15.0.1/Native Method)
at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@15.0.1/EPollSelectorImpl.java:120)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@15.0.1/SelectorImpl.java:129)
- locked <0x00000006158c4388> (a sun.nio.ch.Util$2)
- locked <0x00000006158c4320> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(java.base@15.0.1/SelectorImpl.java:146)
at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:803)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:457)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(java.base@15.0.1/Thread.java:832)
"elasticsearch[es-hotdata167][transport_worker][T#6]" #42 daemon prio=5 os_prio=0 cpu=238901715.61ms elapsed=7337110.50s tid=0x00007f2d0c01e730 nid=0x67f runnable [0x00007f2d1edf1000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPoll.wait(java.base@15.0.1/Native Method)
at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@15.0.1/EPollSelectorImpl.java:120)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@15.0.1/SelectorImpl.java:129)
- locked <0x0000000615e2df48> (a sun.nio.ch.Util$2)
- locked <0x0000000615e2df58> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(java.base@15.0.1/SelectorImpl.java:146)
at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:803)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:457)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(java.base@15.0.1/Thread.java:832)
"elasticsearch[es-hotdata167][transport_worker][T#5]" #43 daemon prio=5 os_prio=0 cpu=659336811.28ms elapsed=7337110.50s tid=0x00007f2cf4011440 nid=0x680 runnable [0x00007f2d1ecf0000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPoll.wait(java.base@15.0.1/Native Method)
at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@15.0.1/EPollSelectorImpl.java:120)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@15.0.1/SelectorImpl.java:129)
- locked <0x0000000615e2e1e0> (a sun.nio.ch.Util$2)
- locked <0x0000000615e2e1f0> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(java.base@15.0.1/SelectorImpl.java:146)
at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:803)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:457)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(java.base@15.0.1/Thread.java:832)
"elasticsearch[es-hotdata167][transport_worker][T#7]" #44 daemon prio=5 os_prio=0 cpu=303700693.72ms elapsed=7337110.50s tid=0x00007f2d0c01ff00 nid=0x681 runnable [0x00007f2d1ebef000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPoll.wait(java.base@15.0.1/Native Method)
at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@15.0.1/EPollSelectorImpl.java:120)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@15.0.1/SelectorImpl.java:129)
- locked <0x0000000615f89598> (a sun.nio.ch.Util$2)
- locked <0x0000000615f895a8> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(java.base@15.0.1/SelectorImpl.java:146)
at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:803)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:457)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(java.base@15.0.1/Thread.java:832)
"elasticsearch[es-hotdata167][transport_worker][T#8]" #45 daemon prio=5 os_prio=0 cpu=316127176.25ms elapsed=7337110.49s tid=0x00007f2cf4012820 nid=0x683 runnable [0x00007f2d1e9ed000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPoll.wait(java.base@15.0.1/Native Method)
at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@15.0.1/EPollSelectorImpl.java:120)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@15.0.1/SelectorImpl.java:129)
"elasticsearch[es-hotdata167][transport_worker][T#3]" #38 daemon prio=5 os_prio=0 cpu=706222935.10ms elapsed=7337142.39s tid=0x00007f2cfc002ed0 nid=0x67c runnable [0x00007f2d1f2f4000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPoll.wait(java.base@15.0.1/Native Method)
at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@15.0.1/EPollSelectorImpl.java:120)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@15.0.1/SelectorImpl.java:129)
- locked <0x00000006158c3388> (a sun.nio.ch.Util$2)
- locked <0x00000006158c3398> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(java.base@15.0.1/SelectorImpl.java:146)
at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:803)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:457)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(java.base@15.0.1/Thread.java:832)
"elasticsearch[es-hotdata167][transport_worker][T#4]" #39 daemon prio=5 os_prio=0 cpu=1168295666.08ms elapsed=7337142.39s tid=0x00007f2d00000d80 nid=0x67d runnable [0x00007f2d1f1f3000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPoll.wait(java.base@15.0.1/Native Method)
at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@15.0.1/EPollSelectorImpl.java:120)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@15.0.1/SelectorImpl.java:129)
- locked <0x00000006158c4388> (a sun.nio.ch.Util$2)
- locked <0x00000006158c4320> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(java.base@15.0.1/SelectorImpl.java:146)
at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:803)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:457)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(java.base@15.0.1/Thread.java:832)
"elasticsearch[es-hotdata167][transport_worker][T#6]" #42 daemon prio=5 os_prio=0 cpu=238903228.17ms elapsed=7337142.15s tid=0x00007f2d0c01e730 nid=0x67f runnable [0x00007f2d1edf1000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPoll.wait(java.base@15.0.1/Native Method)
at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@15.0.1/EPollSelectorImpl.java:120)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@15.0.1/SelectorImpl.java:129)
- locked <0x0000000615e2df48> (a sun.nio.ch.Util$2)
- locked <0x0000000615e2df58> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(java.base@15.0.1/SelectorImpl.java:146)
at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:803)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:457)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(java.base@15.0.1/Thread.java:832)
"elasticsearch[es-hotdata167][transport_worker][T#5]" #43 daemon prio=5 os_prio=0 cpu=659339994.60ms elapsed=7337142.15s tid=0x00007f2cf4011440 nid=0x680 runnable [0x00007f2d1ecf0000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPoll.wait(java.base@15.0.1/Native Method)
at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@15.0.1/EPollSelectorImpl.java:120)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@15.0.1/SelectorImpl.java:129)
- locked <0x0000000615e2e1e0> (a sun.nio.ch.Util$2)
- locked <0x0000000615e2e1f0> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(java.base@15.0.1/SelectorImpl.java:146)
at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:803)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:457)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at java.lang.Thread.run(java.base@15.0.1/Thread.java:832)
"elasticsearch[es-hotdata167][transport_worker][T#7]" #44 daemon prio=5 os_prio=0 cpu=303702518.53ms elapsed=7337142.15s tid=0x00007f2d0c01ff00 nid=0x681 runnable [0x00007f2d1ebef000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPoll.wait(java.base@15.0.1/Native Method)
at sun.nio.ch.EPollSelectorImpl.doSelect(java.base@15.0.1/EPollSelectorImpl.java:120)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(java.base@15.0.1/SelectorImpl.java:129)
Disk IO has no bottleneck, and only the utilization rate is about 50%