I deployed an Elasticsearch cluster in AWS EKS with 3 nodes. After launching the cluster, I can see 3 pods are running but 2 of them running fine, one of them keep failing and terminating and restarting.
Below is the error log on the failed pod.
{"type": "server", "timestamp": "2021-12-26T08:17:33,061Z", "level": "INFO", "component": "o.e.i.g.DatabaseRegistry", "cluster.name": "elk", "node.name": "elk-es-node-1", "message": "downloading geoip database [GeoLite2-ASN.mmdb] to [/tmp/elasticsearch-9470345091343635510/geoip-databases/HoGUMQ9ISsCjQ4KhIL2IFA/GeoLite2-ASN.mmdb.tmp.gz]" }
{"type": "server", "timestamp": "2021-12-26T08:17:33,070Z", "level": "ERROR", "component": "o.e.i.g.DatabaseRegistry", "cluster.name": "elk", "node.name": "elk-es-node-1", "message": "failed to download database [GeoLite2-ASN.mmdb]",
"stacktrace": ["org.elasticsearch.cluster.block.ClusterBlockException: blocked by: [SERVICE_UNAVAILABLE/1/state not recovered / initialized];",
"at org.elasticsearch.cluster.block.ClusterBlocks.globalBlockedException(ClusterBlocks.java:179) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.cluster.block.ClusterBlocks.globalBlockedRaiseException(ClusterBlocks.java:165) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.search.TransportSearchAction.executeSearch(TransportSearchAction.java:605) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.search.TransportSearchAction.executeLocalSearch(TransportSearchAction.java:494) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.search.TransportSearchAction.lambda$executeRequest$3(TransportSearchAction.java:288) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:134) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.index.query.Rewriteable.rewriteAndFetch(Rewriteable.java:103) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.index.query.Rewriteable.rewriteAndFetch(Rewriteable.java:76) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.search.TransportSearchAction.executeRequest(TransportSearchAction.java:329) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.search.TransportSearchAction.doExecute(TransportSearchAction.java:217) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.search.TransportSearchAction.doExecute(TransportSearchAction.java:93) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.support.TransportAction$RequestFilterChain.proceed(TransportAction.java:173) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.support.ActionFilter$Simple.apply(ActionFilter.java:42) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.support.TransportAction$RequestFilterChain.proceed(TransportAction.java:171) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.support.TransportAction.execute(TransportAction.java:149) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.action.support.TransportAction.execute(TransportAction.java:77) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.client.node.NodeClient.executeLocally(NodeClient.java:90) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.client.node.NodeClient.doExecute(NodeClient.java:70) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.client.support.AbstractClient.execute(AbstractClient.java:402) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.client.FilterClient.doExecute(FilterClient.java:54) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.client.OriginSettingClient.doExecute(OriginSettingClient.java:40) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.client.support.AbstractClient.execute(AbstractClient.java:402) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.client.support.AbstractClient.execute(AbstractClient.java:390) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.client.support.AbstractClient.search(AbstractClient.java:534) ~[elasticsearch-7.15.2.jar:7.15.2]",
"at org.elasticsearch.ingest.geoip.DatabaseRegistry.lambda$retrieveDatabase$11(DatabaseRegistry.java:359) [ingest-geoip-7.15.2.jar:7.15.2]",
"at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:678) [elasticsearch-7.15.2.jar:7.15.2]",
"at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) [?:?]",
"at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) [?:?]",
"at java.lang.Thread.run(Thread.java:833) [?:?]"] }
{"type": "server", "timestamp": "2021-12-26T08:17:33,295Z", "level": "INFO", "component": "o.e.l.LicenseService", "cluster.name": "elk", "node.name": "elk-es-node-1", "message": "license [8a88ef40-3b0b-439e-9f46-32e911999b7d] mode [basic] - valid" }
{"type": "server", "timestamp": "2021-12-26T08:17:33,309Z", "level": "INFO", "component": "o.e.h.AbstractHttpServerTransport", "cluster.name": "elk", "node.name": "elk-es-node-1", "message": "publish_address {elk-es-node-1.elk-es-node.default.svc/10.0.1.182:9200}, bound_addresses {0.0.0.0:9200}", "cluster.uuid": "hqRP62pNTze1IWQ0sOOR2Q", "node.id": "HoGUMQ9ISsCjQ4KhIL2IFA" }
{"type": "server", "timestamp": "2021-12-26T08:17:33,310Z", "level": "INFO", "component": "o.e.n.Node", "cluster.name": "elk", "node.name": "elk-es-node-1", "message": "started", "cluster.uuid": "hqRP62pNTze1IWQ0sOOR2Q", "node.id": "HoGUMQ9ISsCjQ4KhIL2IFA" }
The error message says failed to download database [GeoLite2-ASN.mmdb]
but I don't know what does this mean.
Below is my Elasticsearch K8S spec file.
apiVersion: elasticsearch.k8s.elastic.co/v1
kind: Elasticsearch
metadata:
name: elk
spec:
version: 7.15.2
serviceAccountName: docker-sa
http:
tls:
selfSignedCertificate:
disabled: true
nodeSets:
- name: node
count: 3
config:
network.host: 0.0.0.0
xpack.security.enabled: false
podTemplate:
spec:
initContainers:
- name: sysctl
securityContext:
privileged: true
command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
containers:
- name: elasticsearch
readinessProbe:
exec:
command:
- bash
- -c
- /mnt/elastic-internal/scripts/readiness-probe-script.sh
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 12
successThreshold: 1
timeoutSeconds: 12
env:
- name: READINESS_PROBE_TIMEOUT
value: "120"
resources:
requests:
cpu: 1
memory: 4Gi
volumeMounts:
- name: elasticsearch-data
mountPath: /usr/share/elasticsearch/data
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes:
- ReadWriteOnce
storageClassName: ebs-sc
resources:
requests:
storage: 1024Gi
Any idea why this happens?