Hi,
I cannot convert my standalone instance to cluster.
Initially, I tried just to copy elastic data dir to new nodes, but after some googling I realized it won't work.
Then I just brought up elastic daemon on new nodes and let it sync by its own, however it resulted to tons of error and indices went to readnonly.
During next attempt, I restarted master node on nonstandard port in order to exсlude any new data being written before all nodes are in sync. This attempt was also unsuccessful.
This is what I see in master log:
[2023-06-15T04:34:04,252][WARN ][r.suppressed ] [sd-4531-6c55.my.doman] path: /app-system-metrics-2023.06.15/doc, params: {index=app-system-metrics-2023.06.15, type=doc}
org.elasticsearch.action.UnavailableShardsException: [app-system-metrics-2023.06.15][3] primary shard is not active Timeout: [1m], request: [BulkShardRequest [[app-system-metrics-2023.06.15][3]] containing [index {[app-system-metrics-2023.06.15][doc][R2gvvogBZBBt2Z8_zh_1], source[{"application":"cloud-bridge-dh","timestamp":"2023-06-15T08:33:04.227763Z","sysCpuUsg":29.798766294867633,"appCpuUsg":0.026412955554699576,"jvmMemUsed":4677,"hostName":"isgswpid6n1.my.domain","jvmMemFree":3515,"jvmMemTotal":8192,"pid":"32906@isgswpid6n1.nam.nsroot.net","mode":"jms","dataType":"pmm","processingMode":"default,datahighway","threadsNum":287}]}]]
at org.elasticsearch.action.support.replication.TransportReplicationAction$ReroutePhase.retryBecauseUnavailable(TransportReplicationAction.java:985) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.action.support.replication.TransportReplicationAction$ReroutePhase.retryIfUnavailable(TransportReplicationAction.java:862) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.action.support.replication.TransportReplicationAction$ReroutePhase.doRun(TransportReplicationAction.java:814) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.action.support.replication.TransportReplicationAction$ReroutePhase$2.onTimeout(TransportReplicationAction.java:945) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.cluster.ClusterStateObserver$ContextPreservingListener.onTimeout(ClusterStateObserver.java:322) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.cluster.ClusterStateObserver$ObserverClusterStateListener.onTimeout(ClusterStateObserver.java:249) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.cluster.service.ClusterApplierService$NotifyTimeout.run(ClusterApplierService.java:564) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:681) [elasticsearch-6.8.8.jar:6.8.8]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_321]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_321]
at java.lang.Thread.run(Thread.java:750) [?:1.8.0_321]
[2023-06-15T04:44:43,446][WARN ][r.suppressed ] [sd-4531-6c55.my.domain] path: /app-system-metrics-2023.06.15/doc, params: {index=app-system-metrics-2023.06.15, type=doc}
org.elasticsearch.cluster.metadata.ProcessClusterEventTimeoutException: failed to process cluster event (put-mapping) within 30s
at org.elasticsearch.cluster.service.MasterService$Batcher.lambda$onTimeout$0(MasterService.java:127) ~[elasticsearch-6.8.8.jar:6.8.8]
at java.util.ArrayList.forEach(ArrayList.java:1259) ~[?:1.8.0_321]
at org.elasticsearch.cluster.service.MasterService$Batcher.lambda$onTimeout$1(MasterService.java:126) ~[elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:681) ~[elasticsearch-6.8.8.jar:6.8.8]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_321]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_321]
at java.lang.Thread.run(Thread.java:750) [?:1.8.0_321]
Logs of new node 1
[2023-06-15T04:17:37,947][WARN ][o.e.c.NodeConnectionsService] [sd-c3d2-b7ca.my.domain] failed to connect to node {sd-4531-6c55.my.domain}{SJy6ieIMQ5C41td7c3t1Hw}{IF55uJx9Sxa7w8xn953irg}{sd-4531-6c55.my.domain}{xxx.xxx.189.103:9300}{ml.machine_memory=135028703232, ml.max_open_jobs=20, xpack.installed=true, ml.enabled=true} (tried [7] times)
org.elasticsearch.transport.ConnectTransportException: [sd-4531-6c55.my.domain][xxx.xxx.189.103:9300] handshake failed. unexpected remote node {sd-4531-6c55.my.domain}{SJy6ieIMQ5C41td7c3t1Hw}{vhQiTmAGTF68awUB3kVnLQ}{sd-4531-6c55.my.domain}{xxx.xxx.189.103:9300}{ml.machine_memory=135028703232, ml.max_open_jobs=20, xpack.installed=true, ml.enabled=true}
at org.elasticsearch.transport.TransportService.lambda$connectionValidator$4(TransportService.java:350) ~[elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.transport.ConnectionManager.connectToNode(ConnectionManager.java:105) ~[elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.transport.TransportService.connectToNode(TransportService.java:342) ~[elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.transport.TransportService.connectToNode(TransportService.java:329) ~[elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.cluster.NodeConnectionsService.validateAndConnectIfNeeded(NodeConnectionsService.java:154) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.cluster.NodeConnectionsService$ConnectionChecker.doRun(NodeConnectionsService.java:181) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:751) [elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-6.8.8.jar:6.8.8]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_361]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_361]
at java.lang.Thread.run(Thread.java:750) [?:1.8.0_361]
[2023-06-15T04:17:38,898][WARN ][o.e.c.s.ClusterApplierService] [sd-c3d2-b7ca.my.domain] cluster state applier task [apply cluster state (from master [master {sd-07a1-ae2b.my.domain}{0a4UMLAMR56HqeuQzZLn_w}{WbKhroR5QaygUZUPDyAmsQ}{sd-07a1-ae2b.my.domain}{xxx.xxx.89:9300}{ml.machine_memory=67387813888, ml.max_open_jobs=20, xpack.installed=true, ml.enabled=true} committed version [8970]])] took [52.9s] above the warn threshold of 30s
Logs of new node 2
[2023-06-15T05:12:07,281][DEBUG][o.e.a.b.TransportShardBulkAction] [sd-07a1-ae2b.my.domain] [ss-rest-2023.06.15][0] failed to execute bulk item (index) index {[ss-rest-2023.06.15][doc][U4BSvogBZBBt2Z8_pMH0], source[{"statusCode":404,"timestamp":"2023-06-15T09:11:07
.233491Z","url":"/matchingcriteriastream/products/25810505528337/smcp1/criteria","host":"isgswapd6n2.my.domain:9000","msgCount":0,"subscriptionName":"","subjectId":"","requestCount":3,"avgMongoQueryLatency":21.333333333333332,"pid":"28528@isgswapd6n2.nam.nsroot.net","avgMsgSize":0.0,"requestMethod":"PUT","avgResponseLatency":21.333333333333332,"subscriptionType":"","destination":"rest"}]}
org.elasticsearch.cluster.metadata.ProcessClusterEventTimeoutException: failed to process cluster event (put-mapping) within 30s
at org.elasticsearch.cluster.service.MasterService$Batcher.lambda$onTimeout$0(MasterService.java:127) ~[elasticsearch-6.8.8.jar:6.8.8]
at java.util.ArrayList.forEach(ArrayList.java:1259) ~[?:1.8.0_361]
at org.elasticsearch.cluster.service.MasterService$Batcher.lambda$onTimeout$1(MasterService.java:126) ~[elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:681) ~[elasticsearch-6.8.8.jar:6.8.8]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_361]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_361]
at java.lang.Thread.run(Thread.java:750) [?:1.8.0_361]
[2023-06-15T05:13:18,587][DEBUG][o.e.a.a.i.m.p.TransportPutMappingAction] [sd-07a1-ae2b.my.domain] failed to put mappings on indices [[[cloud-bridge-generic-2023.06.15/EjjeqpF9TXiBwkO1NziWfA]]], type [doc]
org.elasticsearch.cluster.metadata.ProcessClusterEventTimeoutException: failed to process cluster event (put-mapping) within 30s
at org.elasticsearch.cluster.service.MasterService$Batcher.lambda$onTimeout$0(MasterService.java:127) ~[elasticsearch-6.8.8.jar:6.8.8]
at java.util.ArrayList.forEach(ArrayList.java:1259) ~[?:1.8.0_361]
at org.elasticsearch.cluster.service.MasterService$Batcher.lambda$onTimeout$1(MasterService.java:126) ~[elasticsearch-6.8.8.jar:6.8.8]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:681) [elasticsearch-6.8.8.jar:6.8.8]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_361]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_361]
at java.lang.Thread.run(Thread.java:750) [?:1.8.0_361]
ES version: 6.8.8
elasticsearch.yml
cluster.name: ELK_SIT
path.data: /opt/elasticdata
network.host: ${HOSTNAME}
discovery.zen.ping.unicast.hosts: sd-4531-6c55,sd-c3d2-b7ca,sd-07a1-ae2b
node.name: ${HOSTNAME}
discovery.zen.minimum_master_nodes: 1
xpack.security.transport.ssl.enabled: true
xpack.security.enabled: true
bootstrap.system_call_filter: false
xpack.ssl.cipher_suites: TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, TLS_RSA_WITH_AES_256_GCM_SHA384, TLS_DHE_RSA_WITH_AES_256_GCM_SHA384, TLS_DHE_DSS_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_DHE_RSA_WITH_AES_128_GCM_SHA256, TLS_DHE_DSS_WITH_AES_128_GCM_SHA256
xpack.security.transport.ssl.verification_mode: none
xpack.security.transport.ssl.keystore.path: certs/elastic-certificates.p12
xpack.security.transport.ssl.truststore.path: certs/elastic-certificates.p12