Elasticsearch failed node exception (Negative longs unsupported) since 7.5 -7.6 upgrade

Since upgrading from 7.5 to 7.6 we have been getting an error on two of our master nodes which is causing them to fail. It seems something is trying to insert a negative long where one is not supported causing failure.

I found reference to the double decrement bug #24922 but havent read eanything else since it first was posted.

The output of _cluster/stats is below

  "_nodes" : {
    "total" : 8,
    "successful" : 6,
    "failed" : 2,
    "failures" : [
      {
        "type" : "failed_node_exception",
        "reason" : "Failed node [ZPIQJnfFTCuiSg22MsOQLw]",
        "node_id" : "ZPIQJnfFTCuiSg22MsOQLw",
        "caused_by" : {
          "type" : "illegal_state_exception",
          "reason" : "Negative longs unsupported, use writeLong or writeZLong for negative numbers [-1110]"
        }
      },
      {
        "type" : "failed_node_exception",
        "reason" : "Failed node [nYi3crUjQ4m6A099m3sUtQ]",
        "node_id" : "nYi3crUjQ4m6A099m3sUtQ",
        "caused_by" : {
          "type" : "illegal_state_exception",
          "reason" : "Negative longs unsupported, use writeLong or writeZLong for negative numbers [-1191]"
        }
      }
    ]
  },
  "cluster_name" : "activeeye",
  "cluster_uuid" : "piKVwtwtSF6z6DJWOW2TnQ",
  "timestamp" : 1583416481073,
  "status" : "green",
  "indices" : {
    "count" : 187,
    "shards" : {
      "total" : 306,
      "primaries" : 179,
      "replication" : 0.7094972067039106,
      "index" : {
        "shards" : {
          "min" : 1,
          "max" : 2,
          "avg" : 1.6363636363636365
        },
        "primaries" : {
          "min" : 0,
          "max" : 1,
          "avg" : 0.9572192513368984
        },
        "replication" : {
          "min" : 0.0,
          "max" : 1.0,
          "avg" : 0.6363636363636364
        }
      }
    },
    "docs" : {
      "count" : 625637475,
      "deleted" : 1419130
    },
    "store" : {
      "size_in_bytes" : 703515709226
    },
    "fielddata" : {
      "memory_size_in_bytes" : 344136,
      "evictions" : 0
    },
    "query_cache" : {
      "memory_size_in_bytes" : 88139424,
      "total_count" : 27285280,
      "hit_count" : 3385185,
      "miss_count" : 23900095,
      "cache_size" : 14515,
      "cache_count" : 110952,
      "evictions" : 96437
    },
    "completion" : {
      "size_in_bytes" : 0
    },
    "segments" : {
      "count" : 3962,
      "memory_in_bytes" : 683487627,
      "terms_memory_in_bytes" : 357001299,
      "stored_fields_memory_in_bytes" : 309127920,
      "term_vectors_memory_in_bytes" : 0,
      "norms_memory_in_bytes" : 215616,
      "points_memory_in_bytes" : 0,
      "doc_values_memory_in_bytes" : 17142792,
      "index_writer_memory_in_bytes" : 119236932,
      "version_map_memory_in_bytes" : 0,
      "fixed_bit_set_memory_in_bytes" : 3272792,
      "max_unsafe_auto_id_timestamp" : 1583413304817,
      "file_sizes" : { }
    }
  },
  "nodes" : {
    "count" : {
      "total" : 6,
      "coordinating_only" : 0,
      "data" : 5,
      "ingest" : 1,
      "master" : 1,
      "ml" : 2,
      "voting_only" : 0
    },
    "versions" : [
      "7.6.0"
    ],
    "os" : {
      "available_processors" : 48,
      "allocated_processors" : 48,
      "names" : [
        {
          "name" : "Linux",
          "count" : 6
        }
      ],
      "pretty_names" : [
        {
          "pretty_name" : "Ubuntu 18.04.4 LTS",
          "count" : 6
        }
      ],
      "mem" : {
        "total_in_bytes" : 405320466432,
        "free_in_bytes" : 69636608000,
        "used_in_bytes" : 335683858432,
        "free_percent" : 17,
        "used_percent" : 83
      }
    },
    "process" : {
      "cpu" : {
        "percent" : 54
      },
      "open_file_descriptors" : {
        "min" : 459,
        "max" : 1328,
        "avg" : 1109
      }
    },
    "jvm" : {
      "max_uptime_in_millis" : 1907281151,
      "versions" : [
        {
          "version" : "13.0.2",
          "vm_name" : "OpenJDK 64-Bit Server VM",
          "vm_version" : "13.0.2+8",
          "vm_vendor" : "AdoptOpenJDK",
          "bundled_jdk" : true,
          "using_bundled_jdk" : true,
          "count" : 6
        }
      ],
      "mem" : {
        "heap_used_in_bytes" : 74059680672,
        "heap_max_in_bytes" : 205740048384
      },
      "threads" : 1234
    },
    "fs" : {
      "total_in_bytes" : 4627881299968,
      "free_in_bytes" : 3822536753152,
      "available_in_bytes" : 3587072835584
    },
    "plugins" : [ ],
    "network_types" : {
      "transport_types" : {
        "security4" : 6
      },
      "http_types" : {
        "security4" : 6
      }
    },
    "discovery_types" : {
      "zen" : 6
    },
    "packaging_types" : [
      {
        "flavor" : "default",
        "type" : "deb",
        "count" : 6
      }
    ],
    "ingest" : {
      "number_of_pipelines" : 28,
      "processor_stats" : {
        "conditional" : {
          "count" : 3867298978,
          "failed" : 1514681,
          "current" : -1522659,
          "time_in_millis" : 8125022
        },
        "convert" : {
          "count" : 4709920278,
          "failed" : 14914,
          "current" : -14914,
          "time_in_millis" : 736729
        },
        "date" : {
          "count" : 15395792,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 31584
        },
        "dot_expander" : {
          "count" : 838856,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 513
        },
        "geoip" : {
          "count" : 487619276,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 1512278
        },
        "grok" : {
          "count" : 39479821,
          "failed" : 10083419,
          "current" : -1710,
          "time_in_millis" : 154745
        },
        "gsub" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "json" : {
          "count" : 211524,
          "failed" : 905,
          "current" : -905,
          "time_in_millis" : 2110
        },
        "lowercase" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 0
        },
        "remove" : {
          "count" : 261919804,
          "failed" : 1694,
          "current" : -1694,
          "time_in_millis" : 84587
        },
        "rename" : {
          "count" : 1246901214,
          "failed" : 112969,
          "current" : -112969,
          "time_in_millis" : 836820
        },
        "script" : {
          "count" : 10497349,
          "failed" : 10080698,
          "current" : 0,
          "time_in_millis" : 127334
        },
        "split" : {
          "count" : 13537736,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 2473
        },
        "urldecode" : {
          "count" : 1618552,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 422
        },
        "user_agent" : {
          "count" : 1618552,
          "failed" : 0,
          "current" : 0,
          "time_in_millis" : 817
        }
      }
    }
  }
}

As you can see 2 nodes have failed.

I will comment an example of the common error on both nodes due to post limits.

Id be really appreciative of any help as this has been an issue I've been working on for over a week.

James

[2020-03-05T13:11:40,048][WARN ][o.e.t.OutboundHandler    ] [node-3] send message failed [channel: Netty4TcpChannel{localAddress=/10.150.28.89:9300, remoteAddress=/10.150.28.87:59118}]
java.lang.IllegalStateException: Negative longs unsupported, use writeLong or writeZLong for negative numbers [-1191]
        at org.elasticsearch.common.io.stream.StreamOutput.writeVLong(StreamOutput.java:299) ~[elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.ingest.IngestStats$Stats.writeTo(IngestStats.java:197) ~[elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.ingest.IngestStats.writeTo(IngestStats.java:103) ~[elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.common.io.stream.StreamOutput.writeOptionalWriteable(StreamOutput.java:897) ~[elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.action.admin.cluster.node.stats.NodeStats.writeTo(NodeStats.java:255) ~[elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.action.admin.cluster.stats.ClusterStatsNodeResponse.writeTo(ClusterStatsNodeResponse.java:99) ~[elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.OutboundMessage.writeMessage(OutboundMessage.java:87) ~[elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.OutboundMessage.serialize(OutboundMessage.java:64) ~[elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.OutboundHandler$MessageSerializer.get(OutboundHandler.java:166) ~[elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.OutboundHandler$MessageSerializer.get(OutboundHandler.java:152) ~[elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.OutboundHandler$SendContext.get(OutboundHandler.java:199) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.OutboundHandler.internalSend(OutboundHandler.java:129) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.OutboundHandler.sendMessage(OutboundHandler.java:124) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.OutboundHandler.sendResponse(OutboundHandler.java:104) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.TcpTransportChannel.sendResponse(TcpTransportChannel.java:64) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.TaskTransportChannel.sendResponse(TaskTransportChannel.java:54) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.action.support.nodes.TransportNodesAction$NodeTransportHandler.messageReceived(TransportNodesAction.java:244) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.action.support.nodes.TransportNodesAction$NodeTransportHandler.messageReceived(TransportNodesAction.java:240) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.transport.SecurityServerTransportInterceptor$ProfileSecuredRequestHandler$1.doRun(SecurityServerTransportInterceptor.java:257) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.common.util.concurrent.EsExecutors$DirectExecutorService.execute(EsExecutors.java:225) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.transport.SecurityServerTransportInterceptor$ProfileSecuredRequestHandler.lambda$messageReceived$0(SecurityServerTransportInterceptor.java:306) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.authz.AuthorizationService.authorizeSystemUser(AuthorizationService.java:378) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.authz.AuthorizationService.authorize(AuthorizationService.java:186) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.transport.ServerTransportFilter$NodeProfile.lambda$inbound$1(ServerTransportFilter.java:130) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:63) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$authenticateAsync$2(AuthenticationService.java:248) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$lookForExistingAuthentication$6(AuthenticationService.java:310) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lookForExistingAuthentication(AuthenticationService.java:321) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.authenticateAsync(AuthenticationService.java:245) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.access$000(AuthenticationService.java:196) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.authc.AuthenticationService.authenticate(AuthenticationService.java:139) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.transport.ServerTransportFilter$NodeProfile.inbound(ServerTransportFilter.java:121) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.xpack.security.transport.SecurityServerTransportInterceptor$ProfileSecuredRequestHandler.messageReceived(SecurityServerTransportInterceptor.java:313) [x-pack-security-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:63) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.transport.InboundHandler$RequestHandler.doRun(InboundHandler.java:264) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:692) [elasticsearch-7.6.0.jar:7.6.0]
        at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-7.6.0.jar:7.6.0]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) [?:?]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) [?:?]
        at java.lang.Thread.run(Thread.java:830) [?:?]

This is a known issue in 7.6.0, fixed by https://github.com/elastic/elasticsearch/pull/52543 in 7.6.1.

2 Likes

Thank you very much David. All sorted :slight_smile:

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.