Docker Swarm - Internode Communication Issue

Hi,

My environment for ELK is a Docker Swarm setup. Three containers on three individual hosts to utilize distributed storage. I'm using Swarmpit as my orchestrator to compose the stack as follows:

version: '3.3'
services:
  elasticsearch:
    image: elasticsearch:7.16.2
    environment:
      ELASTIC_PASSWORD: [password]
      cluster.initial_master_nodes: elasticsearch
      cluster.max_shards_per_node: '1500'
      cluster.name: cisd-prod
      discovery.seed_hosts: 172.16.14.10,172.16.14.11,172.16.14.12
      network.host: 0.0.0.0
      network.publish_host: 172.16.14.10
      node.name: elasticsearch
      transport.port: '9300'
      xpack.monitoring.collection.enabled: 'true'
      xpack.security.enabled: 'true'
      xpack.security.http.ssl.certificate: wildcard.crt
      xpack.security.http.ssl.client_authentication: optional
      xpack.security.http.ssl.enabled: 'true'
      xpack.security.http.ssl.key: wildcard.key
      xpack.security.transport.ssl.client_authentication: required
      xpack.security.transport.ssl.enabled: 'true'
      xpack.security.transport.ssl.keystore.path: elastic-certificates.p12
      xpack.security.transport.ssl.truststore.path: elastic-certificates.p12
      xpack.security.transport.ssl.verification_mode: certificate
    ports:
     - 9200:9200
     - 9300:9300
    volumes:
     - elastic-swarm:/usr/share/elasticsearch/data
     - elastic-swarm-config:/usr/share/elasticsearch/config
    networks:
     - swarmpit_net
    logging:
      driver: json-file
    deploy:
      mode: global
      placement:
        constraints:
         - node.labels.elastic_host == true
         - node.hostname == docker-swarm1
  elasticsearch2:
    image: elasticsearch:7.16.2
    environment:
      ELASTIC_PASSWORD: [password]
      cluster.initial_master_nodes: elasticsearch
      cluster.max_shards_per_node: '1500'
      cluster.name: cisd-prod
      discovery.seed_hosts: 172.16.14.10,172.16.14.11,172.16.14.12
      network.host: 0.0.0.0
      network.publish_host: 172.16.14.11
      node.name: elasticsearch2
      xpack.monitoring.collection.enabled: 'true'
      xpack.security.enabled: 'true'
      xpack.security.http.ssl.certificate: wildcard.crt
      xpack.security.http.ssl.client_authentication: optional
      xpack.security.http.ssl.enabled: 'true'
      xpack.security.http.ssl.key: wildcard.key
      xpack.security.transport.ssl.client_authentication: required
      xpack.security.transport.ssl.enabled: 'true'
      xpack.security.transport.ssl.keystore.path: elastic-certificates.p12
      xpack.security.transport.ssl.truststore.path: elastic-certificates.p12
      xpack.security.transport.ssl.verification_mode: certificate
    ports:
     - 9200:9200
     - 9300:9300
    volumes:
     - elastic-swarm:/usr/share/elasticsearch/data
     - elastic-swarm-config:/usr/share/elasticsearch/config
    networks:
     - swarmpit_net
    logging:
      driver: json-file
    deploy:
      mode: global
      placement:
        constraints:
         - node.labels.elastic_host == true
         - node.hostname == docker-swarm2
  elasticsearch3:
    image: elasticsearch:7.16.2
    environment:
      ELASTIC_PASSWORD: [password]
      cluster.initial_master_nodes: elasticsearch
      cluster.max_shards_per_node: '1500'
      cluster.name: cisd-prod
      discovery.seed_hosts: 172.16.14.10,172.16.14.11,172.16.14.12
      network.host: 0.0.0.0
      network.publish_host: 172.16.14.12
      node.name: elasticsearch3
      xpack.monitoring.collection.enabled: 'true'
      xpack.security.enabled: 'true'
      xpack.security.http.ssl.certificate: wildcard.crt
      xpack.security.http.ssl.client_authentication: optional
      xpack.security.http.ssl.enabled: 'true'
      xpack.security.http.ssl.key: wildcard.key
      xpack.security.transport.ssl.client_authentication: required
      xpack.security.transport.ssl.enabled: 'true'
      xpack.security.transport.ssl.keystore.path: elastic-certificates.p12
      xpack.security.transport.ssl.truststore.path: elastic-certificates.p12
      xpack.security.transport.ssl.verification_mode: certificate
    ports:
     - 9200:9200
     - 9300:9300
    volumes:
     - elastic-swarm:/usr/share/elasticsearch/data
     - elastic-swarm-config:/usr/share/elasticsearch/config
    networks:
     - swarmpit_net
    logging:
      driver: json-file
    deploy:
      mode: global
      placement:
        constraints:
         - node.labels.elastic_host == true
         - node.hostname == docker-swarm3
  kibana-swarm:
    image: kibana:7.16.2
    environment:
      ELASTICSEARCH_HOSTS: https://elasticsearch:9200
      ELASTICSEARCH_PASSWORD: [password]
      ELASTICSEARCH_SSL_CERTIFICATE: /usr/share/kibana/config/wildcard.crt
      ELASTICSEARCH_SSL_KEY: /usr/share/kibana/config/wildcard.key
      ELASTICSEARCH_SSL_VERIFICATIONMODE: none
      ELASTICSEARCH_USERNAME: kibana_system
      SERVER_MAXPAYLOADBYTES: '20971520'
      SERVER_SSL_CERTIFICATE: /usr/share/kibana/config/wildcard.crt
      SERVER_SSL_ENABLED: 'true'
      SERVER_SSL_KEY: /usr/share/kibana/config/wildcard.key
      XPACK_SECURITY_HTTP_SSL_CLIENT_AUTHENTICATION: optional
    ports:
     - 5601:5601
    volumes:
     - kibana-swarm:/usr/share/kibana/config
    networks:
     - swarmpit_net
    logging:
      driver: json-file
    deploy:
      mode: global
      placement:
        constraints:
         - node.hostname == docker-swarm1
networks:
  swarmpit_net:
    external: true
volumes:
  elastic-swarm:
    external: true
  elastic-swarm-config:
    external: true
  kibana-swarm:
    external: true

For the life of me I can't get communication on port 9300 so the nodes will discover each other. The ports are host-mapped and network.publish_host is set to their host IPs so they will be static but to no avail. Is there anyone out there that has encountered this kind of setup?

Thanks.

Getting log lines like:

{"type": "server", "timestamp": "2022-01-12T14:18:29,624Z", "level": "WARN", "component": "o.e.c.c.ClusterFormationFailureHelper", "cluster.name": "cisd-prod", "node.name": "elasticsearch3", "message": "master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and this node must discover master-eligible nodes [elasticsearch] to bootstrap a cluster: have discovered [{elasticsearch3}{1OG7DlCMShaun8EZGeH21w}{U8ME7A54RaGYZiSz0VZn2g}{172.16.14.12}{172.16.14.12:9300}{cdfhilmrstw}]; discovery will continue using [172.16.14.10:9300, 172.16.14.11:9300] from hosts providers and [{elasticsearch3}{1OG7DlCMShaun8EZGeH21w}{U8ME7A54RaGYZiSz0VZn2g}{172.16.14.12}{172.16.14.12:9300}{cdfhilmrstw}] from last-known cluster state; node term 0, last-accepted version 0 in term 0" }

Solved by wiping the data folder of the 2nd and 3rd node as they already decided to form their own clusters and didn't want to join the 1st node's.

Also opened port 7946/tcp and 4789/udp in ufw on each node as suggested at: Manage swarm service networks | Docker Documentation

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.