Dependant nodes in dockerized Elasticsearch 3-node cluster

Hi, I try to install an Elasticsearch (v8.10.2) cluster (with one Kibana and one Logstash container) on Docker for a production environment. My 3 nodes are running correctly on the same server (it will be different in production but I'm currently not at this stage).

I tried to stop them one by one to be sure that if the master node becomes down, another gets the role. If one of the two lasts is down, the cluster stays green, but if the first node is stopped, I lost the connection with the two others.

I started from the blog and the documentation docker compose file (GitHub repository) to build my own configuration file. Here is my 'docker-compose.yml' (I tried different options, so I only put what's functional at the moment):

version: "2.2"

services:
  setup:
    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
    volumes:
      - certs:/usr/share/elasticsearch/config/certs
    user: "0"
    command: >
      bash -c '
        if [ x${ELASTIC_PASSWORD} == x ]; then
          echo "Set the ELASTIC_PASSWORD environment variable in the .env file";
          exit 1;
        elif [ x${KIBANA_PASSWORD} == x ]; then
          echo "Set the KIBANA_PASSWORD environment variable in the .env file";
          exit 1;
        fi;
        if [ ! -f config/certs/ca.zip ]; then
          echo "Creating CA";
          bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip;
          unzip config/certs/ca.zip -d config/certs;
        fi;
        if [ ! -f config/certs/certs.zip ]; then
          echo "Creating certs";
          echo -ne \
          "instances:\n"\
          "  - name: es01\n"\
          "    dns:\n"\
          "      - es01\n"\
          "      - localhost\n"\
          "    ip:\n"\
          "      - 127.0.0.1\n"\
          "  - name: es02\n"\
          "    dns:\n"\
          "      - es02\n"\
          "      - localhost\n"\
          "    ip:\n"\
          "      - 127.0.0.1\n"\
          "  - name: es03\n"\
          "    dns:\n"\
          "      - es03\n"\
          "      - localhost\n"\
          "    ip:\n"\
          "      - 127.0.0.1\n"\
          > config/certs/instances.yml;
          bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key
          unzip config/certs/certs.zip -d config/certs;
        fi;
        echo "Setting file permissions"
        chown -R root:root config/certs;
        find . -type d -exec chmod 750 \{\} \;;
        find . -type f -exec chmod 640 \{\} \;;
        echo "Waiting for Elasticsearch availability";
        until curl -s --cacert config/certs/ca/ca.crt https://es01:9200 | grep -q "missing authentication credentials"; do sleep 30; done;
        echo "Setting kibana_system password";
        until curl -s -X POST --cacert config/certs/ca/ca.crt -u "elastic:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" https://es01:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_PASSWORD}\"}" | grep -q "^{}"; do sleep 10; done;
        echo "All done!";
      '
    healthcheck:
      test: ["CMD-SHELL", "[ -f config/certs/es01/es01.crt ]"]
      interval: 1s
      timeout: 5s
      retries: 120

  es01:
    depends_on:
      setup:
        condition: service_healthy
    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
    volumes:
      - certs:/usr/share/elasticsearch/config/certs
      - esdata01:/usr/share/elasticsearch/data
    ports:
      - ${ES_PORT}:9200
#      - ${ES_PORT_2}:9300
    environment:
      - node.name=es01
      - cluster.name=${CLUSTER_NAME}
      - cluster.initial_master_nodes=es01,es02,es03
      - discovery.seed_hosts=es02,es03
      - ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
      - bootstrap.memory_lock=true
      - xpack.security.enabled=true
      - xpack.security.http.ssl.enabled=true
      - xpack.security.http.ssl.key=certs/es01/es01.key
      - xpack.security.http.ssl.certificate=certs/es01/es01.crt
      - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.enabled=true
      - xpack.security.transport.ssl.key=certs/es01/es01.key
      - xpack.security.transport.ssl.certificate=certs/es01/es01.crt
      - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.verification_mode=certificate
      - xpack.license.self_generated.type=${LICENSE}
    mem_limit: ${MEM_LIMIT}
    ulimits:
      memlock:
        soft: -1
        hard: -1
    healthcheck:
      test:
        [
          "CMD-SHELL",
          "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
        ]
      interval: 10s
      timeout: 10s
      retries: 120

  es02:
    depends_on:
      setup:
        condition: service_healthy
    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
    volumes:
      - certs:/usr/share/elasticsearch/config/certs
      - esdata02:/usr/share/elasticsearch/data
    ports:
      - 9201:9200
#      - 9301:9300
    environment:
      - node.name=es02
      - cluster.name=${CLUSTER_NAME}
      - cluster.initial_master_nodes=es01,es02,es03
      - discovery.seed_hosts=es01,es03
      - bootstrap.memory_lock=true
      - xpack.security.enabled=true
      - xpack.security.http.ssl.enabled=true
      - xpack.security.http.ssl.key=certs/es01/es01.key
      - xpack.security.http.ssl.certificate=certs/es01/es01.crt
      - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.enabled=true
      - xpack.security.transport.ssl.key=certs/es01/es01.key
      - xpack.security.transport.ssl.certificate=certs/es01/es01.crt
      - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.verification_mode=certificate
      - xpack.license.self_generated.type=${LICENSE}
    mem_limit: ${MEM_LIMIT}
    ulimits:
      memlock:
        soft: -1
        hard: -1
    healthcheck:
      test:
        [
          "CMD-SHELL",
          "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
        ]
      interval: 10s
      timeout: 10s
      retries: 120

  es03:
    depends_on:
      setup:
        condition: service_healthy
    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
    volumes:
      - certs:/usr/share/elasticsearch/config/certs
      - esdata03:/usr/share/elasticsearch/data
    ports:
      - 9202:9200
#      - 9302:9300
    environment:
      - node.name=es03
      - cluster.name=${CLUSTER_NAME}
      - cluster.initial_master_nodes=es01,es02,es03
      - discovery.seed_hosts=es01,es02
      - bootstrap.memory_lock=true
      - xpack.security.enabled=true
      - xpack.security.http.ssl.enabled=true
      - xpack.security.http.ssl.key=certs/es01/es01.key
      - xpack.security.http.ssl.certificate=certs/es01/es01.crt
      - xpack.security.transport.ssl.enabled=true
      - xpack.security.transport.ssl.key=certs/es01/es01.key
      - xpack.security.transport.ssl.certificate=certs/es01/es01.crt
      - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
      - xpack.security.transport.ssl.verification_mode=certificate
      - xpack.license.self_generated.type=${LICENSE}
    mem_limit: ${MEM_LIMIT}
    ulimits:
      memlock:
        soft: -1
        hard: -1
    healthcheck:
      test:
        [
          "CMD-SHELL",
          "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
        ]
      interval: 10s
      timeout: 10s
      retries: 120

  kibana:
    depends_on:
      setup:
        condition: service_healthy
    image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
    volumes:
      - certs:/usr/share/kibana/config/certs
      - kibanadata:/usr/share/kibana/data
    ports:
      - ${KIBANA_PORT}:5601
    environment:
      - SERVERNAME=kibana
      - ELASTICSEARCH_HOSTS=https://es01:9200
      - ELASTICSEARCH_USERNAME=kibana_system
      - ELASTICSEARCH_PASSWORD=${KIBANA_PASSWORD}
      - ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt
      - ELASTICSEARCH_SSL_VERIFICATIONMODE=certificate
      - XPACK_SECURITY_ENABLED=true
      - SERVER_SSL_ENABLED=true
      - SERVER_SSL_KEY=config/certs/es01/es01.key
      - SERVER_SSL_CERTIFICATE=config/certs/es01/es01.crt
      - XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY=${KIBANA_OBJECTS_ENCRYPTION_KEY}
      - XPACK_REPORTING_ENCRYPTIONKEY=${KIBANA_REPORTS_ENCRYPTION_KEY}
      - XPACK_SECURITY_ENCRYPTIONKEY=${KIBANA_SECURITY_ENCRYPTION_KEY}
    mem_limit: ${MEM_LIMIT}
    healthcheck:
      test:
        [
          "CMD-SHELL",
          "curl -s --cacert config/certs/ca/ca.crt -I https://localhost:5601 | grep -q 'HTTP/1.1 302 Found'"
        ]
      interval: 10s
      timeout: 10s
      retries: 120

  logstash:
    depends_on:
      setup:
        condition: service_healthy
    image: docker.elastic.co/logstash/logstash:${STACK_VERSION}
    user: root
    volumes:
      - ./logstash/config:/usr/share/logstash/config:ro,Z
      - ./logstash/pipeline:/usr/share/logstash/pipeline:ro,Z
      - certs:/usr/share/logstash/config/certs
      - logstashdata:/usr/share/logstash/data
    environment:
      - ELASTIC_HOSTS=https://es01:9200
      - ELASTIC_USER=elastic
      - ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
      - LOGSTASH_INTERNAL_PASSWORD=${LOGSTASH_PASSWORD}

volumes:
  certs:
    driver: local
  esdata01:
    driver: local
  esdata02:
    driver: local
  esdata03:
    driver: local
  kibanadata:
    driver: local
  logstashdata:
    driver: local

I don't know what I should add, so that the communication with the two last nodes stays enabled, even if the first one is cancelled. I didn't find what I'm looking for in the documentation and tutorial templates are almost always incorrect in my case. After a lot of attempts, I'm stuck on this. If someone has an idea, I'll be thankful :slight_smile:

I can give more details if asked. Thanks

Exactly where you lost the connection? Running a curl to the other two nodes work?

If you are talking about Kibana, this is expected because on your Docker Compose you configured the elasticsearch hosts with just the first node, you need to configure with the 3 nodes.

You need to change to something like this:

ELASTICSEARCH_HOSTS=["http://es01:9200","http://es02:9201", "http://es03:9202"]

The same thing applies to Logstash, you configured only the first node.

Hi Leandro,

For Kibana and Logstash, I didn't add those nodes because I had a problem occuring only if I put all three nodes in the ELASTICSEARCH_HOSTS option when I tried to connect to the Kibana interface. I surely forgot something because I wanted to take a screenshot of the error but this time, I can access to the dashboard. I also modified this value for Logstash.

However, I still can't CURL the two other nodes when the first one is down. I also have this error on the Kibana webpage when I stopped the node 1:

{
  "statusCode": 500,
  "error": "Internal Server Error",
  "message": "An internal server error occurred. Check Kibana server logs for details."
}

The message appears before the Kibana container stopped itself.

If I send a request with CUrL, I also have an error :
Node 1:

curl -u elastic:${PASSWORD} -X GET "https://${IP}:9200/_ssl/certificates?pretty" -k
curl: (7) Failed to connect to ${IP} port 9200: Connection refused

(Nothing's strange here)

Node 2:

curl -u elastic:${PASSWORD} -X GET "https://${IP}:9201/_ssl/certificates?pretty" -k
{
  "error" : {
    "root_cause" : [
      {
        "type" : "security_exception",
        "reason" : "unable to authenticate user [elastic] for REST request [/_ssl/certificates?pretty]",
        "header" : {
          "WWW-Authenticate" : [
            "Basic realm=\"security\" charset=\"UTF-8\"",
            "Bearer realm=\"security\"",
            "ApiKey"
          ]
        }
      }
    ],
    "type" : "security_exception",
    "reason" : "unable to authenticate user [elastic] for REST request [/_ssl/certificates?pretty]",
    "header" : {
      "WWW-Authenticate" : [
        "Basic realm=\"security\" charset=\"UTF-8\"",
        "Bearer realm=\"security\"",
        "ApiKey"
      ]
    }
  },
  "status" : 401
}

After starting the node 1 (and Kibana), I still get this bad response when sending a CUrL request on the node 2. I don't know if it can help you or if it is normal.

I use the Elasticvue Chrome extension and I can almost instantly see the nodes 2 and 3 disappearing from the node table when I stop the node 1.
Maybe there is a link somewhere, forcing to stop running nodes. Yet, the containers stay healthy, according to the 'docker ps' command.
The entire Elasticsearch configuration is in the docker compose file I shared before (as I said, I only changed the ELASTICSEARCH_HOSTS value in Kibana and Logstash since my first post), only Logstash has more configuration in a bound volume.

Later, I checked Kibana logs and got multiple times this kind of lines:

[2023-10-25T13:23:30.915+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
    at processTicksAndRejections (node:internal/process/task_queues:95:5)
    at KibanaTransport.request (/usr/share/kibana/node_modules/@kbn/core-elasticsearch-client-server-internal/src/create_transport.js:51:16)
    at ClientTraced.GetApi [as get] (/usr/share/kibana/node_modules/@elastic/elasticsearch/lib/api/api/get.js:36:12) {
  meta: {
    body: undefined,
    statusCode: 0,
    headers: {},
    meta: {
      context: null,
      request: [Object],
      name: 'elasticsearch-js',
      connection: [Object],
      attempts: 0,
      aborted: false
    },
    warnings: [Getter]
  },
  isBoom: true,
  isServer: true,
  data: null,
  output: {
    statusCode: 503,
    payload: {
      statusCode: 503,
      error: 'Service Unavailable',
      message: 'connect ECONNREFUSED 192.168.32.6:9202'
    },
    headers: {}
  },
  [Symbol(SavedObjectsClientErrorCode)]: 'SavedObjectsClient/esUnavailable'
}
[2023-10-25T13:50:43.056+00:00][ERROR][plugins.taskManager] Failed to poll for work: ConnectionError: connect ECONNREFUSED 192.168.32.5:9201
[2023-10-25T13:50:43.146+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.2:9200
[2023-10-25T13:50:45.169+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:51:05.256+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:51:09.204+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. getaddrinfo ENOTFOUND es01
[2023-10-25T13:51:25.372+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:51:35.091+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:51:39.517+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:51:41.712+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.6:9202
[2023-10-25T13:51:45.546+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:51:59.858+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:52:13.289+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. getaddrinfo ENOTFOUND es01
[2023-10-25T13:52:19.950+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:52:23.643+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.6:9202
[2023-10-25T13:52:31.861+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:52:41.651+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. getaddrinfo ENOTFOUND es01
[2023-10-25T13:52:51.961+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:53:12.070+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:53:20.511+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:53:28.103+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.6:9202
[2023-10-25T13:53:32.159+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:53:44.160+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. getaddrinfo ENOTFOUND es01
[2023-10-25T13:53:50.307+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:54:10.404+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:54:20.249+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.6:9202
[2023-10-25T13:54:30.516+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:54:39.015+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. getaddrinfo ENOTFOUND es01
[2023-10-25T13:54:39.899+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:54:39.920+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:54:40.520+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:54:49.105+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:55:02.511+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:55:09.192+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:55:16.656+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.5:9201
[2023-10-25T13:55:29.279+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:55:44.099+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.6:9202
[2023-10-25T13:55:49.370+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:55:56.693+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.5:9201
[2023-10-25T13:56:09.466+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:56:16.154+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.6:9202
[2023-10-25T13:56:29.563+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:56:34.186+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.2:9200
[2023-10-25T13:56:35.648+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.6:9202
[2023-10-25T13:56:38.145+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.2:9200
[2023-10-25T13:56:40.647+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.5:9201
[2023-10-25T13:56:42.353+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:56:43.160+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.2:9200
[2023-10-25T13:56:45.653+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.5:9201
[2023-10-25T13:56:48.149+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.6:9202
[2023-10-25T13:56:50.653+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.2:9200
[2023-10-25T13:56:52.554+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.
[2023-10-25T13:56:53.151+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.6:9202
[2023-10-25T13:56:55.649+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.2:9200
[2023-10-25T13:56:58.152+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.5:9201
[2023-10-25T13:57:00.652+00:00][ERROR][elasticsearch-service] Unable to retrieve version information from Elasticsearch nodes. connect ECONNREFUSED 192.168.32.6:9202
[2023-10-25T13:57:02.778+00:00][ERROR][plugins.security.authentication] License is not available, authentication is not possible.

I tried to down and up the node 1 two or three more times after the first Kibana crash (it stopped once, then the container stays healthy even when I stopped the node 1).

Thank you for your help!

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.