ES 7.2.0 Master fails to join cluster

I am using ES 7.2.0 cluster with 3 Masters and 2 Slaves. Bump into the following exception in the master node:

{"type": "server", "timestamp": "2019-07-22T08:20:00,042+0000", "level": "WARN", "component": "o.e.c.c.Coordinator", "cluster.name": "elasticsearch", "node.name": "elasticsearch-master-0", "cluster.uuid": "YG8BgKRWR1ePWXB4w1EFyw", "node.id": "GSXQtoK4TLm8C0HiQWlqcA", "message": "received cluster state from {elasticsearch-master-1}{tN08_9qVSvqIT_6Gt2Qbgg}{XHEFhNeLQ8-f1Nj_bQfNIw}{10.1.1.4}{10.1.1.4:9300}{ml.machine_memory=24907440128, ml.max_open_jobs=20, xpack.installed=true} with a different cluster uuid IuClnUZARBW6nT4NxboQyw than local cluster uuid YG8BgKRWR1ePWXB4w1EFyw, rejecting" }
{"type": "server", "timestamp": "2019-07-22T08:20:00,056+0000", "level": "INFO", "component": "o.e.c.c.JoinHelper", "cluster.name": "elasticsearch", "node.name": "elasticsearch-master-0", "cluster.uuid": "YG8BgKRWR1ePWXB4w1EFyw", "node.id": "GSXQtoK4TLm8C0HiQWlqcA", "message": "failed to join {elasticsearch-master-1}{tN08_9qVSvqIT_6Gt2Qbgg}{XHEFhNeLQ8-f1Nj_bQfNIw}{10.1.1.4}{10.1.1.4:9300}{ml.machine_memory=24907440128, ml.max_open_jobs=20, xpack.installed=true} with JoinRequest{sourceNode={elasticsearch-master-0}{GSXQtoK4TLm8C0HiQWlqcA}{0cQCIRUNQYmu-G6OJ_c5Mg}{10.1.1.3}{10.1.1.3:9300}{ml.machine_memory=24907440128, xpack.installed=true, ml.max_open_jobs=20}, optionalJoin=Optional[Join{term=129, lastAcceptedTerm=3, lastAcceptedVersion=18, sourceNode={elasticsearch-master-0}{GSXQtoK4TLm8C0HiQWlqcA}{0cQCIRUNQYmu-G6OJ_c5Mg}{10.1.1.3}{10.1.1.3:9300}{ml.machine_memory=24907440128, xpack.installed=true, ml.max_open_jobs=20}, targetNode={elasticsearch-master-1}{tN08_9qVSvqIT_6Gt2Qbgg}{XHEFhNeLQ8-f1Nj_bQfNIw}{10.1.1.4}{10.1.1.4:9300}{ml.machine_memory=24907440128, ml.max_open_jobs=20, xpack.installed=true}}]}" ,
"stacktrace": ["org.elasticsearch.transport.RemoteTransportException: [elasticsearch-master-1][10.1.1.4:9300][internal:cluster/coordination/join]",
"Caused by: org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException: publication failed",
"at org.elasticsearch.cluster.coordination.Coordinator$CoordinatorPublication$3.onFailure(Coordinator.java:1353) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.common.util.concurrent.ListenableFuture$1.run(ListenableFuture.java:101) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.common.util.concurrent.EsExecutors$DirectExecutorService.execute(EsExecutors.java:193) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.common.util.concurrent.ListenableFuture.notifyListener(ListenableFuture.java:92) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.common.util.concurrent.ListenableFuture.addListener(ListenableFuture.java:54) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.Coordinator$CoordinatorPublication.onCompletion(Coordinator.java:1293) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.Publication.onPossibleCompletion(Publication.java:124) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.Publication.onPossibleCommitFailure(Publication.java:172) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.Publication.access$600(Publication.java:41) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.Publication$PublicationTarget$PublishResponseHandler.onFailure(Publication.java:348) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.Coordinator$6.onFailure(Coordinator.java:1080) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.PublicationTransportHandler.lambda$sendClusterStateToNode$6(PublicationTransportHandler.java:267) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.PublicationTransportHandler$3.handleException(PublicationTransportHandler.java:285) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.transport.TransportService$ContextRestoreResponseHandler.handleException(TransportService.java:1111) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.transport.InboundHandler.lambda$handleException$2(InboundHandler.java:246) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:688) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]",
"at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]",
"at java.lang.Thread.run(Thread.java:835) [?:?]",
"Caused by: org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException: non-failed nodes do not form a quorum",
"at org.elasticsearch.cluster.coordination.Publication.onPossibleCommitFailure(Publication.java:170) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.Publication.access$600(Publication.java:41) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.Publication$PublicationTarget$PublishResponseHandler.onFailure(Publication.java:348) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.Coordinator$6.onFailure(Coordinator.java:1080) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.PublicationTransportHandler.lambda$sendClusterStateToNode$6(PublicationTransportHandler.java:267) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.cluster.coordination.PublicationTransportHandler$3.handleException(PublicationTransportHandler.java:285) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.transport.TransportService$ContextRestoreResponseHandler.handleException(TransportService.java:1111) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.transport.InboundHandler.lambda$handleException$2(InboundHandler.java:246) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:688) ~[elasticsearch-7.2.0.jar:7.2.0]",
"at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]",
"at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]",
"at java.lang.Thread.run(Thread.java:835) ~[?:?]"] }

Hi @Kok_How_Teh, this message here ...

... means that you have two separate clusters - node the different cluster UUIDs. Elasticsearch has no way to merge these clusters together, hence the error. If you do not expect to have more than one cluster, you have probably not configured cluster bootstrapping correctly. See this note in the docs for more information and a solution.

I did. This is my config which runs on my local kubernetes cluster.

apiVersion: v1
kind: ConfigMap
metadata:
  creationTimestamp: "2018-12-28T04:09:38Z"
  name: elasticsearch-config
  namespace: default 
  resourceVersion: "789954"
  selfLink: /api/v1/namespaces/default/configmaps/elasticsearch-config
  uid: 6494f746-0a56-11e9-a34a-482ae31e6a94
data:
  elasticsearch-master.yml: |+
    cluster.name: elasticsearch
    cluster.initial_master_nodes: 
      - elasticsearch-master-0
      - elasticsearch-master-1
      - elasticsearch-master-2
    discovery.seed_hosts:
      - elasticsearch-master-0.svc-elasticsearch-discovery.default.svc.cluster.local
      - elasticsearch-master-1.svc-elasticsearch-discovery.default.svc.cluster.local
      - elasticsearch-master-2.svc-elasticsearch-discovery.default.svc.cluster.local
    script.painless.regex.enabled: true
    http.cors.enabled: true
    http.cors.allow-origin: "*"
    network.host: _site_
    node.name: ${HOSTNAME}
    node.master: true
    node.data: false
    bootstrap.memory_lock: true

  elasticsearch-data.yml: |+
    cluster.name: elasticsearch
    cluster.initial_master_nodes: 
      - elasticsearch-master-0
      - elasticsearch-master-1
      - elasticsearch-master-2
    discovery.seed_hosts:
      - elasticsearch-master-0.svc-elasticsearch-discovery.default.svc.cluster.local
      - elasticsearch-master-1.svc-elasticsearch-discovery.default.svc.cluster.local
      - elasticsearch-master-2.svc-elasticsearch-discovery.default.svc.cluster.local
    script.painless.regex.enabled: true
    http.cors.enabled: true
    http.cors.allow-origin: "*"
    network.host: _site_
    node.name: ${HOSTNAME}
    node.master: false
    node.data: true
    bootstrap.memory_lock: true

  ES_JAVA_OPTS: -Xms512m -Xmx512m -Djna.tmpdir=/tmp

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.