Hi,
I have a 8 node cluster setup, with 3 master, 2 ingest and 3 data nodes running on Kubernetes(1.9.2). I am using a custom build image based on the official 6.2.2-x-pack image. The Dockerfile is below:
FROM docker.elastic.co/elasticsearch/elasticsearch-basic:6.2.2
VOLUME ["/usr/share/elasticsearch/data"]
ENV ES_JAVA_OPTS "-Xms5g -Xmx5g"
ENV CLUSTER_NAME elasticsearch-default
ENV NODE_MASTER true
ENV NODE_DATA true
ENV NODE_INGEST true
ENV HTTP_ENABLE true
ENV NETWORK_HOST _site_
ENV HTTP_CORS_ENABLE true
ENV HTTP_CORS_ALLOW_ORIGIN *
ENV NUMBER_OF_MASTERS 1
ENV MAX_LOCAL_STORAGE_NODES 1
ENV SHARD_ALLOCATION_AWARENESS ""
ENV SHARD_ALLOCATION_AWARENESS_ATTR ""
ENV MEMORY_LOCK true
ENV REPO_LOCATIONS []
USER root
COPY config/jvm.options /usr/share/elasticsearch/config/
COPY config/elasticsearch.yml /usr/share/elasticsearch/config/
COPY config/log4j2.properties /usr/share/elasticsearch/config/
COPY run.sh .
RUN chown elasticsearch:elasticsearch /usr/share/elasticsearch/config/jvm.options \
/usr/share/elasticsearch/config/elasticsearch.yml \
/usr/share/elasticsearch/config/log4j2.properties \
run.sh && chmod 0750 run.sh
CMD ["/bin/sh", "run.sh"]
run.sh -
#!/bin/sh
set -ex
ulimit -n 65536
ulimit -u 4096
ulimit -l unlimited
chown -R elasticsearch:elasticsearch /usr/share/elasticsearch
chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/data
su elasticsearch /usr/local/bin/docker-entrypoint.sh /usr/share/elasticsearch/bin/elasticsearch
elasticsearch.yml
cluster:
name: ${CLUSTER_NAME}
node:
master: ${NODE_MASTER}
name: ${NODE_NAME}
data: ${NODE_DATA}
ingest: ${NODE_INGEST}
ml: false
max_local_storage_nodes: ${MAX_LOCAL_STORAGE_NODES}
processors: ${PROCESSORS:1}
network.host: ${NETWORK_HOST}
path:
data: /data/data
logs: /data/log
repo: ${REPO_LOCATIONS}
bootstrap:
memory_lock: ${MEMORY_LOCK}
http:
enabled: ${HTTP_ENABLE}
compression: true
cors:
enabled: ${HTTP_CORS_ENABLE}
allow-origin: ${HTTP_CORS_ALLOW_ORIGIN}
discovery:
zen:
minimum_master_nodes: ${NUMBER_OF_MASTERS}
ping.unicast.hosts: elasticsearch-discovery
xpack.security.enabled: false
xpack.monitoring.enabled: true
xpack.ml.enabled: false
xpack.monitoring.exporters.my_local:
type: local
use_ingest: false
The nodes just restart without any error message and it requires deleting the pod or deployment multiple times before they finally stabilize.
Logs from master and ingest nodes: https://pastebin.com/Qc1sWMaG
Any clue as to why this is happening and how to fix this?