Elasticsearch NodeJS client shows inaccessible node as alive

I have a simple function in NodeJS which creates an Elasticsearch client with a cluster of nodes and returns one of the node URLs that is active:

// Elastic cluster nodes
const ELASTIC_CLUSTER_NODES = [
    'http://node1.domain.com:9200',
    'http://node2.domain.com:9200',
    'http://node3.domain.com:9200',
];

async function getElasticsearchUrl() {
    const { Client } = require('@elastic/elasticsearch');
    const client = new Client({
        nodes: ELASTIC_CLUSTER_NODES,
        sniffOnStart: true,
        sniffOnConnectionFail: true,
        snifferTimeout: 60,
        auth: {
            username: 'username',
            password: 'password'
        }
    });

    // Return the first alive connection URL.
    return (await client).connectionPool.connections.find((connection) => {
        return connection.status === "alive";
    }).url;
}
exports.getElasticsearchUrl = getElasticsearchUrl;

node1 is currently down. The expectation is that the client will sniff the hosts and return only 2 nodes that are alive - node2 and node3 .

However, the client returns all 3 nodes as alive:

"connections": [
            {
                "url": "http://node1.domain.com:9200/",
                "id": "http://node1.domain.com:9200/",
                "headers": {},
                "deadCount": 0,
                "resurrectTimeout": 0,
                "_openRequests": 1,
                "status": "alive",
                "roles": {
                    "master": true,
                    "data": true,
                    "ingest": true,
                    "ml": false
                }
            },
            {
                "url": "http://node2.domain.com:9200/",
                "id": "http://node2.domain.com:9200/",
                "headers": {},
                "deadCount": 0,
                "resurrectTimeout": 0,
                "_openRequests": 0,
                "status": "alive",
                "roles": {
                    "master": true,
                    "data": true,
                    "ingest": true,
                    "ml": false
                }
            },
            {
                "url": "http://node3.domain.com:9200/",
                "id": "http://node3.domain.com:9200/",
                "headers": {},
                "deadCount": 0,
                "resurrectTimeout": 0,
                "_openRequests": 0,
                "status": "alive",
                "roles": {
                    "master": true,
                    "data": true,
                    "ingest": true,
                    "ml": false
                }
            }
        ],

This causes the application to use node1 's URL which is inaccessible, resulting in the application breaking. I cross-checked with the _nodes API which accurately returns 2 nodes as being alive:

{
  "_nodes": {
    "total": 2,
    "successful": 2,
    "failed": 0
  },
  "cluster_name": "my-cluster",
  "nodes": {
    "OcQeD7-cgRV_rMEkw21oAm": {
      "name": "node3",
      "transport_address": "xx.xxx.xx.100:9300",
      "host": "xx.xxx.xx.100",
      "ip": "xx.xxx.xx.100",
      "version": "7.13.0",
      "build_flavor": "default",
      "build_type": "rpm",
      "build_hash": "528ef91cc5cba6855f0a35386d966390ec1b20c5",
      "roles": [
        "data",
        "data_cold",
        "data_content",
        "data_frozen",
        "data_hot",
        "data_warm",
        "ingest",
        "master",
        "ml",
        "remote_cluster_client",
        "transform"
      ],
      "attributes": {
        "ml.machine_memory": "8201256960",
        "ml.max_open_jobs": "512",
        "xpack.installed": "true",
        "ml.max_jvm_size": "4102029312",
        "transform.node": "true"
      },
      "http": {
        "bound_address": [
          "xx.xxx.xx.100:9200"
        ],
        "publish_address": "xx.xxx.xx.100:9200",
        "max_content_length_in_bytes": 104857600
      }
    },
    "N--DVxaQzbvDQqkUju9-Uu": {
      "name": "node2",
      "transport_address": "xx.xxx.xx.99:9300",
      "host": "xx.xxx.xx.99",
      "ip": "xx.xxx.xx.99",
      "version": "7.13.0",
      "build_flavor": "default",
      "build_type": "rpm",
      "build_hash": "3169552cdbce523538ec0f1fc98a6a665cb98500",
      "roles": [
        "data",
        "data_cold",
        "data_content",
        "data_frozen",
        "data_hot",
        "data_warm",
        "ingest",
        "master",
        "ml",
        "remote_cluster_client",
        "transform"
      ],
      "attributes": {
        "ml.machine_memory": "8201273344",
        "xpack.installed": "true",
        "transform.node": "true",
        "ml.max_open_jobs": "512",
        "ml.max_jvm_size": "4102029312"
      },
      "http": {
        "bound_address": [
          "xx.xxx.xx.99:9200"
        ],
        "publish_address": "xx.xxx.xx.99:9200",
        "max_content_length_in_bytes": 104857600
      }
    }
  }
}

Is there anything wrong with the code or the logic to fetch the first available working Elasticsearch node URL? Any information on this would be really helpful.

Note: All sensitive info has been obfuscated

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.