No alive nodes found in your cluster all of sudden

Hi,
I am running into a problem with ES service. Some cron jobs are running on my server and crawling data from other applications. Then the data is indexed in elastic.
The problem is Elastic service failed any time, and doesn't index data. Saying error
"no alive nodes found in your cluster".
I had to manually restart the Elasticsearch service, then it back up to work.

Found Log at /var/log/messages:
AHV-ID-8121 kernel: Out of memory: Kill process 36993 (java) score 143 or sacrifice child
AHV-ID-8121 kernel: Killed process 36993 (java) total-vm:6090372kB, anon-rss:2479712kB, file-rss:0kB, shmem-rss:0kB
AHV-ID-8121 systemd: elasticsearch.service: main process exited, code=killed, status=9/KILL
AHV-ID-8121 systemd: Unit elasticsearch.service entered failed state.
AHV-ID-8121 systemd: elasticsearch.service failed.
AHV-ID-8121 dockerd: time="2017-11-02T12:23:41.973997500Z" level=info msg="libcontainerd: new containerd process, pid: 38366"
AHV-ID-8121 systemd-logind: Removed session 143489.

(FYI: I am using very basic Elastic configuration)

What can be the cause? Any help Please!

What is the full output from the cluster stats API?

{
"_nodes" : {
"total" : 1,
"successful" : 1,
"failed" : 0
},
"cluster_name" : "buythenews_cluster",
"timestamp" : 1509627687303,
"status" : "yellow",
"indices" : {
"count" : 1,
"shards" : {
"total" : 5,
"primaries" : 5,
"replication" : 0.0,
"index" : {
"shards" : {
"min" : 5,
"max" : 5,
"avg" : 5.0
},
"primaries" : {
"min" : 5,
"max" : 5,
"avg" : 5.0
},
"replication" : {
"min" : 0.0,
"max" : 0.0,
"avg" : 0.0
}
}
},
"docs" : {
"count" : 110592,
"deleted" : 4
},
"store" : {
"size" : "168.6mb",
"size_in_bytes" : 176882811,
"throttle_time" : "0s",
"throttle_time_in_millis" : 0
},
"fielddata" : {
"memory_size" : "0b",
"memory_size_in_bytes" : 0,
"evictions" : 0
},
"query_cache" : {
"memory_size" : "0b",
"memory_size_in_bytes" : 0,
"total_count" : 0,
"hit_count" : 0,
"miss_count" : 0,
"cache_size" : 0,
"cache_count" : 0,
"evictions" : 0
},
"completion" : {
"size" : "0b",
"size_in_bytes" : 0
},
"segments" : {
"count" : 30,
"memory" : "978kb",
"memory_in_bytes" : 1001525,
"terms_memory" : "879.4kb",
"terms_memory_in_bytes" : 900523,
"stored_fields_memory" : "29.3kb",
"stored_fields_memory_in_bytes" : 30072,
"term_vectors_memory" : "0b",
"term_vectors_memory_in_bytes" : 0,
"norms_memory" : "20.6kb",
"norms_memory_in_bytes" : 21120,
"points_memory" : "1.4kb",
"points_memory_in_bytes" : 1450,
"doc_values_memory" : "47.2kb",
"doc_values_memory_in_bytes" : 48360,
"index_writer_memory" : "0b",
"index_writer_memory_in_bytes" : 0,
"version_map_memory" : "0b",
"version_map_memory_in_bytes" : 0,
"fixed_bit_set" : "0b",
"fixed_bit_set_memory_in_bytes" : 0,
"max_unsafe_auto_id_timestamp" : -1,
"file_sizes" : { }
}
},
"nodes" : {
"count" : {
"total" : 1,
"data" : 1,
"coordinating_only" : 0,
"master" : 1,
"ingest" : 1
},
"versions" : [
"5.6.3"
],
"os" : {
"available_processors" : 4,
"allocated_processors" : 4,
"names" : [
{
"name" : "Linux",
"count" : 1
}
],
"mem" : {
"total" : "15.5gb",
"total_in_bytes" : 16650620928,
"free" : "1.6gb",
"free_in_bytes" : 1802637312,
"used" : "13.8gb",
"used_in_bytes" : 14847983616,
"free_percent" : 11,
"used_percent" : 89
}
},
"process" : {
"cpu" : {
"percent" : 24
},
"open_file_descriptors" : {
"min" : 183,
"max" : 183,
"avg" : 183
}
},
"jvm" : {
"max_uptime" : "23.4s",
"max_uptime_in_millis" : 23441,
"versions" : [
{
"version" : "1.8.0_141",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "25.141-b16",
"vm_vendor" : "Oracle Corporation",
"count" : 1
}
],
"mem" : {
"heap_used" : "278.1mb",
"heap_used_in_bytes" : 291618768,
"heap_max" : "1.9gb",
"heap_max_in_bytes" : 2112618496
},
"threads" : 44
},
"fs" : {
"total" : "38.4gb",
"total_in_bytes" : 41293721600,
"free" : "34.2gb",
"free_in_bytes" : 36742766592,
"available" : "34.2gb",
"available_in_bytes" : 36742766592
},
"plugins" : [ ],
"network_types" : {
"transport_types" : {
"netty4" : 1
},
"http_types" : {
"netty4" : 1
}
}
}
}

That looks OK. Is there anything in the Elasticsearch logs?

Only following that i also added already:

In /var/log/messages:

AHV-ID-8121 kernel: Out of memory: Kill process 36993 (java) score 143 or sacrifice child
AHV-ID-8121 kernel: Killed process 36993 (java) total-vm:6090372kB, anon-rss:2479712kB, file-rss:0kB, shmem-rss:0kB
AHV-ID-8121 systemd: elasticsearch.service: main process exited, code=killed, status=9/KILL
AHV-ID-8121 systemd: Unit elasticsearch.service entered failed state.
AHV-ID-8121 systemd: elasticsearch.service failed.
AHV-ID-8121 dockerd: time="2017-11-02T12:23:41.973997500Z" level=info msg="libcontainerd: new containerd process, pid: 38366"
AHV-ID-8121 systemd-logind: Removed session 143489.

Can you please suggest me to where i can find some specific log for you? Thanks

That will depend on how you installed it and which OS you are using.

Looks like an OOM killer process or something like this, no?

kernel: Out of memory: Kill process 36993 (java) score 143 or sacrifice child

Hello,

I just found on my server(Pleask server statistics), my RAM is fully occupied so may OOM killer kills java processes. I will increase my server Memory and then can check if its still crashing or not. Till Thanks for all your quick response.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.