Docker swarm discovery problems

zboinek · February 18, 2020, 12:42am

Hi all, I have following problem. I try to set up ES 7.6 in docker swarm mode with following docker-compose file

version: "3"
services:
  elasticsearch01:
    image: docker.elastic.co/elasticsearch/elasticsearch:7.6.0
    hostname: elasticsearch01
    environment:
      - cluster.name=MC-elasticsearch-cluster
      - bootstrap.memory_lock=true
      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
      - node.name=elasticsearch01
      - discovery.seed_hosts=elasticsearch02,elasticsearch03
      - cluster.initial_master_nodes=elasticsearch01,elasticsearch02,elasticsearch03
    ulimits:
      memlock:
        soft: -1
        hard: -1
    ports:
      - "9200:9200"
    #    volumes:
    #      - "ES_DATA01:/usr/share/elasticsearch/data"
    #      - "/data/elasticsearch/config:/usr/share/elasticsearch/config"
    deploy:
      mode: replicated
      replicas: 1
      placement:
        constraints:
          - node.labels.name==node1

  elasticsearch02:
    image: docker.elastic.co/elasticsearch/elasticsearch:7.6.0
    hostname: elasticsearch02
    environment:
      - cluster.name=MC-elasticsearch-cluster
      - bootstrap.memory_lock=true
      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
      - node.name=elasticsearch02
      - discovery.seed_hosts=elasticsearch01,elasticsearch03
      - cluster.initial_master_nodes=elasticsearch01,elasticsearch02,elasticsearch03
    ulimits:
      memlock:
        soft: -1
        hard: -1
    # ports:
    #  - "9200"
    #    volumes:
    #      - "ES_DATA02:/usr/share/elasticsearch/data"
    #      - "/data/elasticsearch/config:/usr/share/elasticsearch/config"
    deploy:
      mode: replicated
      replicas: 1
      placement:
        constraints:
          - node.labels.name==node2

  elasticsearch03:
    image: docker.elastic.co/elasticsearch/elasticsearch:7.6.0
    hostname: elasticsearch03
    environment:
      - cluster.name=MC-elasticsearch-cluster
      - bootstrap.memory_lock=true
      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
      - node.name=elasticsearch03
      - discovery.seed_hosts=elasticsearch02,elasticsearch01
      - cluster.initial_master_nodes=elasticsearch01,elasticsearch02,elasticsearch03
    ulimits:
      memlock:
        soft: -1
        hard: -1
    #ports:
    #  - "9200"
    #    volumes:
    #      - "ES_DATA03:/usr/share/elasticsearch/data"
    #     - "/data/elasticsearch/config:/usr/share/elasticsearch/config"
    deploy:
      mode: replicated
      replicas: 1
      placement:
        constraints:
          - node.labels.name==node3

  kibana:
    hostname: kibana
    image: docker.elastic.co/kibana/kibana:7.6.0
    ports:
      - "5601:5601"
    environment:
      ELASTICSEARCH_HOSTS: http://elasticsearch01:9200
    depends_on:
      - elasticsearch01
    #    volumes:
    #      - "/data/kibana/config:/usr/share/kibana/config"
    #     - "/data/kibana/data:/usr/share/kibana/data"
    deploy:
      mode: replicated
      replicas: 1
      placement:
        constraints:
          - node.labels.name==node1

volumes:
  ES_DATA01:
    driver: local
    driver_opts:
      type: "none"
      device: "/data/elasticsearch/data"
      o: "bind"
  ES_DATA02:
    driver: local
    driver_opts:
      type: "none"
      device: "/data/elasticsearch/data"
      o: "bind"
  ES_DATA03:
    driver: local
    driver_opts:
      type: "none"
      device: "/data/elasticsearch/data"
      o: "bind"

After all the problems with memory locking and stuff I have now problem with master node discovery.
As I can see from logs

c.c.ClusterFormationFailureHelper", "cluster.name": "MC-elasticsearch-cluster", "node.name": "elasticsearch01",
 "message": "master not discovered or elected yet, an election requires 2 nodes with ids [z8Q0zQltTtG56VrtAUwtBA, xMIjIAfGTRS_BoudVUPKcg], have discovered [{elasticsearch01}{xMIjIAfGTRS_BoudVUPKcg}{z1iUOEnVRUKz3TQ0gywViA}{10.0.0.122}{10.0.0.122:9300}{dilm}{ml.machine_memory=6087704576, xpack.installed=true, ml.max_open_jobs=20}, 
{elasticsearch02}{e-cOeOgVQuWa-XaqPcttyg}{L13QhojnT-60w4cx9JggXw}{10.0.10.11}{10.0.10.11:9300}{dilm}{ml.machine_memory=6087696384, ml.max_open_jobs=20, 
xpack.installed=true}, {elasticsearch03}{z8Q0zQltTtG56VrtAUwtBA}{4hk0vKRxSQCjZuuA0Fkrrg}{10.0.10.3}{10.0.10.3:9300}{dilm}{ml.machine_memory=6087704576, ml.max_open_jobs=20, xpack.installed=true}] which is a quorum; discovery will continue using [10.0.10.10:9300, 
10.0.10.2:9300] from hosts providers and [{elasticsearch01}{xMIjIAfGTRS_BoudVUPKcg}{z1iUOEnVRUKz3TQ0gywViA}{10.0.0.122}{10.0.0.122:9300}{dilm}
{ml.machine_memory=6087704576, xpack.installed=true, ml.max_open_jobs=20}] from last-known cluster state; node term 1, last-accepted version 0 in term 0" }

It can find all my nodes and see their IPs, also said that there is quorum but it still want to continue discovery and then can't set master.
Can you help me with that? I'm going to sleep now, but will be glad for any tips for tomorrow.
All firewalls are disabled as well as selinux.

DavidTurner · February 18, 2020, 10:28am

Could you share the corresponding message (from ClusterFormationFailureHelper) in full from all of your master-eligible nodes? I suspect they are not all saying the same thing.

zboinek · February 18, 2020, 12:04pm

Hi, thanks for reply.
I'm still trying to get this done. Here you have logs from all 3 nodes

{"type": "server", "timestamp": "2020-02-18T11:57:30,586Z", "level": "WARN", "component": 
"o.e.c.c.ClusterFormationFailureHelper", "cluster.name": "MC-elasticsearch-cluster", "node.name": 
"elasticsearch01", "message": "master not discovered or elected yet, an election requires 2 nodes
 with ids [Gii4iO3ZR3mfdC7duHscqA, YK6lQvVoTDm6p0iSnZ0JDw], have discovered 
[{elasticsearch01}{YK6lQvVoTDm6p0iSnZ0JDw}{ZVui9rUaTiCdqsWdhkzslg}{10.0.0.145}
{10.0.0.145:9300}{dilm}{ml.machine_memory=6087704576, xpack.installed=true, 
ml.max_open_jobs=20}, {elasticsearch02}{iV4dlHauTp-TcJCs0vzEEA}{iVHNBd8wQa2BVDEQIeG-sQ}
{10.0.14.8}{10.0.14.8:9300}{dilm}{ml.machine_memory=6087696384, ml.max_open_jobs=20, 
xpack.installed=true}, {elasticsearch03}{Gii4iO3ZR3mfdC7duHscqA}{B-
SO2DXQQKenBQdAw46W6Q}{10.0.14.11}{10.0.14.11:9300}{dilm}
{ml.machine_memory=6087704576, ml.max_open_jobs=20, xpack.installed=true}] which is a 
quorum; discovery will continue using [10.0.14.6:9300, 10.0.14.7:9300, 10.0.14.10:9300] from 
hosts providers and [{elasticsearch01}{YK6lQvVoTDm6p0iSnZ0JDw}{ZVui9rUaTiCdqsWdhkzslg}
{10.0.0.145}{10.0.0.145:9300}{dilm}{ml.machine_memory=6087704576, xpack.installed=true, 
ml.max_open_jobs=20}] from last-known cluster state; node term 1, last-accepted version 0 in 
term 0" }

node2:

{"type": "server", "timestamp": "2020-02-18T11:44:44,600Z", "level": "INFO", "component": 
"o.e.x.m.e.l.LocalExporter", "cluster.name": "MC-elasticsearch-cluster", "node.name": 
"elasticsearch02", "message": "waiting for elected master node [{elasticsearch03}
{Gii4iO3ZR3mfdC7duHscqA}{B-SO2DXQQKenBQdAw46W6Q}{10.0.14.11}{10.0.14.11:9300}{dilm}
{ml.machine_memory=6087704576, ml.max_open_jobs=20, xpack.installed=true}] to setup local
 exporter [default_local] (does it have x-pack installed?)", "cluster.uuid": 
"QPIOwuZgS1qflKZx_dbeCA", "node.id": "iV4dlHauTp-TcJCs0vzEEA"  }

{"type": "server", "timestamp": "2020-02-18T11:44:46,601Z", "level": "INFO", "component": 
"o.e.x.m.e.l.LocalExporter", "cluster.name": "MC-elasticsearch-cluster", "node.name": 
"elasticsearch02", "message": "waiting for elected master node [{elasticsearch03}
{Gii4iO3ZR3mfdC7duHscqA}{B-SO2DXQQKenBQdAw46W6Q}{10.0.14.11}{10.0.14.11:9300}{dilm}
{ml.machine_memory=6087704576, ml.max_open_jobs=20, xpack.installed=true}] to setup local
 exporter [default_local] (does it have x-pack installed?)", "cluster.uuid": 
"QPIOwuZgS1qflKZx_dbeCA", "node.id": "iV4dlHauTp-TcJCs0vzEEA"  }

zboinek · February 18, 2020, 12:05pm

And there I can see more problems
node3:

elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | {"type": "server", "timestamp": "2020-02-18T11:50:24,272Z", "level": "WARN", "component": "o.e.t.TcpTransport", "cluster.name": "MC-elasticsearch-cluster", "node.name": "elasticsearch03", "message": "exception caught on transport layer [Netty4TcpChannel{localAddress=/10.0.14.11:9300, remoteAddress=/10.0.14.4:39272}], closing connection", "cluster.uuid": "QPIOwuZgS1qflKZx_dbeCA", "node.id": "Gii4iO3ZR3mfdC7duHscqA" ,
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "stacktrace": ["io.netty.handler.codec.DecoderException: java.io.StreamCorruptedException: invalid internal transport message format, got (d,a,d,a)",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:473) ~[netty-codec-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:281) ~[netty-codec-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.handler.logging.LoggingHandler.channelRead(LoggingHandler.java:241) [netty-handler-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1422) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:931) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:163) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:700) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:600) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:554) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:514) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.util.concurrent.SingleThreadEventExecutor$6.run(SingleThreadEventExecutor.java:1050) [netty-common-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) [netty-common-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at java.lang.Thread.run(Thread.java:830) [?:?]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "Caused by: java.io.StreamCorruptedException: invalid internal transport message format, got (d,a,d,a)",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at org.elasticsearch.transport.TcpTransport.readHeaderBuffer(TcpTransport.java:763) ~[elasticsearch-7.6.0.jar:7.6.0]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at org.elasticsearch.transport.TcpTransport.readMessageLength(TcpTransport.java:738) ~[elasticsearch-7.6.0.jar:7.6.0]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at org.elasticsearch.transport.netty4.Netty4SizeHeaderFrameDecoder.decode(Netty4SizeHeaderFrameDecoder.java:43) ~[transport-netty4-client-7.6.0.jar:7.6.0]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.handler.codec.ByteToMessageDecoder.decodeRemovalReentryProtection(ByteToMessageDecoder.java:503) ~[netty-codec-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:442) ~[netty-codec-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "... 20 more"] }
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | {"type": "server", "timestamp": "2020-02-18T11:50:24,280Z", "level": "WARN", "component": "o.e.t.TcpTransport", "cluster.name": "MC-elasticsearch-cluster", "node.name": "elasticsearch03", "message": "exception caught on transport layer [Netty4TcpChannel{localAddress=/10.0.14.11:9300, remoteAddress=/10.0.14.4:39272}], closing connection", "cluster.uuid": "QPIOwuZgS1qflKZx_dbeCA", "node.id": "Gii4iO3ZR3mfdC7duHscqA" ,
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "stacktrace": ["io.netty.handler.codec.DecoderException: java.io.StreamCorruptedException: invalid internal transport message format, got (d,a,d,a)",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:473) ~[netty-codec-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.handler.codec.ByteToMessageDecoder.channelInputClosed(ByteToMessageDecoder.java:406) ~[netty-codec-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.handler.codec.ByteToMessageDecoder.channelInputClosed(ByteToMessageDecoder.java:373) ~[netty-codec-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.handler.codec.ByteToMessageDecoder.channelInactive(ByteToMessageDecoder.java:356) ~[netty-codec-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:236) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.handler.logging.LoggingHandler.channelInactive(LoggingHandler.java:167) [netty-handler-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.fireChannelInactive(AbstractChannelHandlerContext.java:236) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.DefaultChannelPipeline$HeadContext.channelInactive(DefaultChannelPipeline.java:1417) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:257) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannelHandlerContext.invokeChannelInactive(AbstractChannelHandlerContext.java:243) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.DefaultChannelPipeline.fireChannelInactive(DefaultChannelPipeline.java:913) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.AbstractChannel$AbstractUnsafe$8.run(AbstractChannel.java:819) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:163) [netty-common-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:510) [netty-common-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:518) [netty-transport-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.util.concurrent.SingleThreadEventExecutor$6.run(SingleThreadEventExecutor.java:1050) [netty-common-4.1.43.Final.jar:4.1.43.Final]",
elastic-search-staack_elasticsearch03.1.sv0p58f8n3qe@mcplcld01-elk03    | "at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) [netty-common-

zboinek · February 18, 2020, 3:34pm

It works. You can't publish ports to host in swarm

I removed that binding and it starts to work

diego2glez · February 26, 2020, 7:07pm

Hi @zboinek
I'm experiencing the same issue using amazons opendristro version.

You can open ports if you specify type "host", but you wont be able to access from outside using all addresses listening in the cluster and only by using the host one where the node runs.

ports:
  - target: 9200
    published: 9200
    protocol: tcp
    mode: host

But seems that when you open ports over swarm network (ingress) elastic isn't able to find cluster peers. If i'm correct that's because docker adds another net interface and elastic doesn't know which one take for discovery purpouse.

Explanation:

 odfe-node1:
    environment:
      - cluster.name=odfe-cluster
      - node.name=odfe-node1
      - discovery.seed_hosts=odfe-node1,odfe-node2
      - cluster.initial_master_nodes=odfe-node1,odfe-node2
    ports:
      - 9200:9200
    networks:
      - odfe-net

  odfe-node2:
    environment:
      - cluster.name=odfe-cluster
      - node.name=odfe-node2
      - discovery.seed_hosts=odfe-node1,odfe-node2
      - cluster.initial_master_nodes=odfe-node1,odfe-node2
    networks:
      - odfe-net

(Where odfe-net is overlay network for inner stack communication.)

This configuration won't work because odfe-node1 will have 2 net ifaces among which to choose:

[odfe-net]
397: eth0@if398: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue state UP group default
    link/ether 02:42:0a:00:0f:0b brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 10.0.15.11/24 brd 10.0.15.255 scope global eth0
       valid_lft forever preferred_lft forever

[ingress]
401: eth1@if402: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue state UP group default
    link/ether 02:42:0a:00:00:44 brd ff:ff:ff:ff:ff:ff link-netnsid 1
    inet 10.0.0.68/24 brd 10.0.0.255 scope global eth1
       valid_lft forever preferred_lft forever

On the other hand, odfe-node2 only have one to pick:

[odfe-net]
383: eth0@if384: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue state UP group default
    link/ether 02:42:0a:00:0f:03 brd ff:ff:ff:ff:ff:ff link-netnsid 0
    inet 10.0.15.3/24 brd 10.0.15.255 scope global eth0
       valid_lft forever preferred_lft forever

As you can see in the log, 9300 port are in different subnets:

[INFO ][o.e.t.TransportService   ] [odfe-node1] publish_address {10.0.0.68:9300}, bound_addresses {0.0.0.0:9300} 

[INFO ][o.e.t.TransportService   ] [odfe-node2] publish_address {10.0.15.3:9300}, bound_addresses {0.0.0.0:9300}

End ups odfe-node2 can't find odfe-node1:

[WARN ][o.e.c.c.ClusterFormationFailureHelper] [odfe-node2] master not discovered or elected yet, an election requires a node with id [4s-pH6a2TZOHvE9fIp8ctw], have discovered [{odfe-node2}{XBb1rwi4SQCoGBDgyJR4CA}{31d7zH6nRVGs0Lw4hn56CA}{10.0.15.3}{10.0.15.3:9300}{dim}] which is not a quorum; discovery will continue using [10.0.15.10:9300, 10.0.15.2:9300] from hosts providers and [{odfe-node2}{XBb1rwi4SQCoGBDgyJR4CA}{31d7zH6nRVGs0Lw4hn56CA}{10.0.15.3}{10.0.15.3:9300}{dim}] from last-known cluster state

There's an elasticsearch config parameter to specify network.publish_host but due to dynamic docker address assignation I don't know how to approach.

Have you find another solution for this?

DavidTurner · February 26, 2020, 8:06pm

In that case I think network.host: _eth0_ on both nodes (and no other network.* or transport.* parameters) is all you need.

diego2glez · February 27, 2020, 1:47pm

Works like a charm!

(In my case) I ended up using in services with exposed ports

network.publish_host=_eth1_

and for the others

network.publish_host=_eth0_

to achieve the cluster discovery and formation.

I didn't use the network.hosts property because override both publish_host and bind_host. I prefer to let elastic listen for incoming requests on every address.

I don't know how docker names interfaces, but for now I don't find any other workaround

thanks a lot!

I leave here the final compose:

docker-compose.yml

version: '3.4'

services:

  odfe-master:

    image: amazon/opendistro-for-elasticsearch:1.4.0

    environment:

      - cluster.name=odfe-cluster

      - node.name=odfe-master

      - network.publish_host=_eth1_

      - discovery.seed_hosts=odfe-master,odfe-data1,odfe-data2,odfe-data3,odfe-data4 # Discover nodes on diferent hosts

      - cluster.initial_master_nodes=odfe-master,odfe-data1,odfe-data2

      - bootstrap.memory_lock=true

    volumes:

      - odfe-master:/usr/share/elasticsearch/data

    ports:

      - 9200:9200

      - 9600:9600 # required for Performance Analyzer

    networks:

      - odfe-net

    configs:

      - source: jvm-options-master

        target: /usr/share/elasticsearch/config/jvm.options

  odfe-data1:

    image: amazon/opendistro-for-elasticsearch:1.4.0

    environment:

      - cluster.name=odfe-cluster

      - node.name=odfe-data1

      - network.publish_host=_eth1_

      - discovery.seed_hosts=odfe-master,odfe-data1,odfe-data2,odfe-data3,odfe-data4 # Discover nodes on diferent hosts

      - cluster.initial_master_nodes=odfe-master,odfe-data1,odfe-data2

      - bootstrap.memory_lock=true

    volumes:

      - odfe-data1:/usr/share/elasticsearch/data

    ports:

      - 9201:9200

    networks:

      - odfe-net

    configs:

      - source: jvm-options-data

        target: /usr/share/elasticsearch/config/jvm.options

  odfe-data2:

    image: amazon/opendistro-for-elasticsearch:1.4.0

    environment:

      - cluster.name=odfe-cluster

      - node.name=odfe-data2

      - network.publish_host=_eth1_

      - discovery.seed_hosts=odfe-master,odfe-data1,odfe-data2,odfe-data3,odfe-data4 # Discover nodes on diferent hosts

      - cluster.initial_master_nodes=odfe-master,odfe-data1,odfe-data2

      - bootstrap.memory_lock=true

    volumes:

      - odfe-data2:/usr/share/elasticsearch/data

    ports:

      - 9202:9200

    networks:

      - odfe-net

    configs:

      - source: jvm-options-data

        target: /usr/share/elasticsearch/config/jvm.options

  odfe-data3:

    image: amazon/opendistro-for-elasticsearch:1.4.0

    environment:

      - cluster.name=odfe-cluster

      - node.name=odfe-data3

      - network.publish_host=_eth0_

      - discovery.seed_hosts=odfe-master,odfe-data1,odfe-data2,odfe-data3,odfe-data4 # Discover nodes on diferent hosts

      - cluster.initial_master_nodes=odfe-master,odfe-data1,odfe-data2

      - bootstrap.memory_lock=true

    volumes:

      - odfe-data3:/usr/share/elasticsearch/data

    networks:

      - odfe-net

    configs:

      - source: jvm-options-data

        target: /usr/share/elasticsearch/config/jvm.options

  odfe-data4:

    image: amazon/opendistro-for-elasticsearch:1.4.0

    environment:

      - cluster.name=odfe-cluster

      - node.name=odfe-data4

      - network.publish_host=_eth0_

      - discovery.seed_hosts=odfe-master,odfe-data1,odfe-data2,odfe-data3,odfe-data4 # Discover nodes on diferent hosts

      - cluster.initial_master_nodes=odfe-master,odfe-data1,odfe-data2

      - bootstrap.memory_lock=true

    volumes:

      - odfe-data4:/usr/share/elasticsearch/data

    networks:

      - odfe-net

    configs:

      - source: jvm-options-data

        target: /usr/share/elasticsearch/config/jvm.options

  odfe-kibana:

    image: amazon/opendistro-for-elasticsearch-kibana:1.4.0

    ports:

      - 5601:5601

    environment:

      ELASTICSEARCH_HOSTS: '["https://odfe-master:9200","https://odfe-data1:9200","https://odfe-data2:9200"]'

    networks:

      - odfe-net

    configs:

      - source: kibana-conf

        target: /usr/share/kibana/config/kibana.yml

volumes:

  odfe-master:
  odfe-data1:
  odfe-data2:
  odfe-data3:
  odfe-data4:

# Create networks manually before run

networks:

  odfe-net:

    external: true

system · March 26, 2020, 1:47pm

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
Master not discovered yet Elasticsearch docker	14	1174	July 19, 2022
Unable to create elasticsearch cluster in docker swarm Elasticsearch	10	3554	April 27, 2020
Trying to setup elasticsearch cluster with docker-compose Elasticsearch	10	17463	December 15, 2017
Docker swarm deployment: master_not_discovered_exception Elasticsearch docker	5	719	December 26, 2021
How is Docker/Docker-Compose getting in the way? Elasticsearch docker	15	2595	May 26, 2020

Docker swarm discovery problems

Related topics