Hi all, I'm seeing errors when trying to initialize fleet on my eck stack. You can check the manifests used below.
Elasticsearch:
apiVersion: elasticsearch.k8s.elastic.co/v1
kind: Elasticsearch
metadata:
name: stable-cluster
annotations:
eck.k8s.elastic.co/license: basic
spec:
version: 8.12.1
auth:
roles:
- secretName: logstash-roles-secret
nodeSets:
- name: master-node
count: 3
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 500Gi
storageClassName: premium-rwo
config:
node.store.allow_mmap: false
node.roles: ["master"]
podTemplate:
spec:
containers:
- name: elasticsearch
env:
- name: READINESS_PROBE_TIMEOUT
value: "10"
- name: ES_JAVA_OPTS
value: -Xms10g -Xmx10g
livenessProbe:
tcpSocket:
port: 9200
initialDelaySeconds: 60
periodSeconds: 60
readinessProbe:
tcpSocket:
port: 9200
initialDelaySeconds: 60
periodSeconds: 60
resources:
requests:
memory: 4Gi
cpu: 4
limits:
memory: 12Gi
- name: hot-data
count: 3
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1000Gi
storageClassName: premium-rwo
config:
node.store.allow_mmap: false
node.attr.type: hot
cluster.routing.allocation.awareness.attributes: k8s_node_name,zone
node.roles: [ data_hot, data_content, ingest ]
podTemplate:
spec:
containers:
- name: elasticsearch
env:
- name: READINESS_PROBE_TIMEOUT
value: "10"
- name: ES_JAVA_OPTS
value: -Xms10g -Xmx10g
livenessProbe:
tcpSocket:
port: 9200
initialDelaySeconds: 60
periodSeconds: 60
readinessProbe:
tcpSocket:
port: 9200
initialDelaySeconds: 60
periodSeconds: 60
resources:
requests:
memory: 2Gi
limits:
memory: 12Gi
- name: warm-data
count: 3
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1000Gi
storageClassName: standard
config:
node.store.allow_mmap: false
node.attr.type: warm
cluster.routing.allocation.awareness.attributes: k8s_node_name,zone
node.roles: data_warm
podTemplate:
spec:
containers:
- name: elasticsearch
env:
- name: READINESS_PROBE_TIMEOUT
value: "10"
- name: ES_JAVA_OPTS
value: -Xms10g -Xmx10g
livenessProbe:
tcpSocket:
port: 9200
initialDelaySeconds: 60
periodSeconds: 60
readinessProbe:
tcpSocket:
port: 9200
initialDelaySeconds: 60
periodSeconds: 60
resources:
requests:
memory: 2Gi
limits:
memory: 12Gi
- name: cold-data
count: 3
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1000Gi
storageClassName: standard-rwo
config:
node.store.allow_mmap: false
node.attr.type: cold
cluster.routing.allocation.awareness.attributes: k8s_node_name,zone
node.roles: data_cold
podTemplate:
spec:
containers:
- name: elasticsearch
env:
- name: READINESS_PROBE_TIMEOUT
value: "10"
- name: ES_JAVA_OPTS
value: -Xms10g -Xmx10g
livenessProbe:
tcpSocket:
port: 9200
initialDelaySeconds: 60
periodSeconds: 60
readinessProbe:
tcpSocket:
port: 9200
initialDelaySeconds: 60
periodSeconds: 60
resources:
requests:
memory: 2Gi
limits:
memory: 12Gi
Kibana:
apiVersion: kibana.k8s.elastic.co/v1
kind: Kibana
metadata:
name: stable-kibana
spec:
version: 8.12.1
count: 1
config:
xpack.fleet.agents.elasticsearch.hosts: ["https://stable-cluster-es-http:9200"]
xpack.fleet.agents.fleet_server.hosts: ["https://fleet-server-agent-http:8220"]
xpack.fleet.packages:
- name: apm
version: latest
- name: system
version: latest
- name: elastic_agent
version: latest
- name: fleet_server
version: latest
xpack.fleet.agentPolicies:
- name: Fleet Server ECK
id: fleet-server-policy
namespace: default
monitoring_enabled:
- logs
- metrics
unenroll_timeout: 900
package_policies:
- name: fleet_server
id: fleet_server
package:
name: fleet_server
- name: Elastic Agent ECK policy
id: eck-agent
namespace: default
monitoring_enabled:
- logs
- metrics
unenroll_timeout: 900
is_default: true
package_policies:
- name: system
id: system
package:
name: system
server.publicBaseUrl: http://xxxxx.example.com
elasticsearchRef:
name: stable-cluster
http:
tls:
selfSignedCertificate:
disabled: true
podTemplate:
spec:
initContainers:
- imagePullPolicy: Always
name: elastic-internal-init-config
containers:
- name: kibana
imagePullPolicy: Always
livenessProbe:
tcpSocket:
port: 5601
initialDelaySeconds: 15
periodSeconds: 10
readinessProbe:
tcpSocket:
port: 5601
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 12
successThreshold: 1
timeoutSeconds: 12
env:
- name: NODE_OPTIONS
value: "--max-old-space-size=2048"
resources:
requests:
memory: 1Gi
cpu: 0.5
limits:
memory: 2.5Gi
cpu: 2
Fleet Server:
apiVersion: agent.k8s.elastic.co/v1alpha1
kind: Agent
metadata:
name: fleet
spec:
version: 8.12.1
kibanaRef:
name: stable-kibana
elasticsearchRefs:
- name: stable-cluster
mode: fleet
fleetServerEnabled: true
policyID: fleet-server-policy
deployment:
replicas: 1
podTemplate:
spec:
containers:
- name: agent
image: registry/elastic-agent:8.12.1
livenessProbe:
exec:
command:
- touch
- /tmp/fleet-server
readinessProbe:
exec:
command:
- touch
- /tmp/fleet-server
serviceAccountName: fleet-server
automountServiceAccountToken: true
securityContext:
runAsUser: 0
In the end, I expected to see the fleet server log in kibana/elasticsearch, but what actually happens is the following log:
{"log.level":"info","@timestamp":"2024-04-05T18:33:39.798Z","message":"Waiting on policy with Fleet Server integration: fleet-server-policy","component":{"binary":"fleet-server","dataset":"elastic_agent.fleet_server","id":"fleet-server-default","type":"fleet-server"},"log":{"source":"fleet-server-default"},"service.type":"fleet-server","state":"STARTING","ecs.version":"1.6.0","service.name":"fleet-server","ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2024-04-05T18:33:44.140Z","log.origin":{"file.name":"cmd/run.go","file.line":308},"message":"signal \"terminated\" received","log":{"source":"elastic-agent"},"ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2024-04-05T18:33:44.140Z","log.origin":{"file.name":"cmd/run.go","file.line":320},"message":"Shutting down Elastic Agent and sending last events...","log":{"source":"elastic-agent"},"ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2024-04-05T18:33:44.141Z","message":"On signal","component":{"binary":"fleet-server","dataset":"elastic_agent.fleet_server","id":"fleet-server-default","type":"fleet-server"},"log":{"source":"fleet-server-default"},"sig":"terminated","ecs.version":"1.6.0","service.name":"fleet-server","service.type":"fleet-server","ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2024-04-05T18:33:44.141Z","message":"Stopping","component":{"binary":"fleet-server","dataset":"elastic_agent.fleet_server","id":"fleet-server-default","type":"fleet-server"},"log":{"source":"fleet-server-default"},"ecs.version":"1.6.0","service.name":"fleet-server","service.type":"fleet-server","state":"STOPPING","ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2024-04-05T18:33:44.341Z","log.origin":{"file.name":"cmd/run.go","file.line":326},"message":"Shutting down completed.","log":{"source":"elastic-agent"},"ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2024-04-05T18:33:44.343Z","log.origin":{"file.name":"reload/reload.go","file.line":68},"message":"Stopping server","log":{"source":"elastic-agent"},"ecs.version":"1.6.0"}
{"log.level":"info","@timestamp":"2024-04-05T18:33:44.343Z","log.logger":"api","log.origin":{"file.name":"api/server.go","file.line":80},"message":"Stats endpoint (127.0.0.1:6791) finished: accept tcp 127.0.0.1:6791: use of closed network connection","log":{"source":"elastic-agent"},"ecs.version":"1.6.0"}
Error: fleet-server failed: context canceled
Do you have any idea why this is happening?
Thanks.