Hello everyone,
Hope you're all doing well! I've been diving into setting up Elasticsearch on Kubernetes, and it's been quite the journey. I've been at it for the past two weeks, with my main goal being to seamlessly feed Kubernetes logs into Elasticsearch.
However, I've hit a bit of a roadblock when it comes to collecting logs from Kubernetes – anything other than fleet-server and agent logs, to be specific. It looks like there's an SSL hiccup causing the issue, and despite my best efforts, I'm having a tough time cracking the code on this one.
If anyone has experience or insights into tackling SSL-related challenges in a Kubernetes environment, I would be immensely grateful for any guidance you can share.
I am seeing the following error logs:
elastic-system/elastic-agent-56z47[elastic-agent]: {"log.level":"error","@timestamp":"2023-12-12T22:04:35.435Z","message":"Failed to connect to backoff(elasticsearch(https://elasticsearch-es-http.elastic-system.svc:9200)): Get \"https://elasticsearch-es-http.elastic-system.svc:9200\": x509: certificate signed by unknown authority","component":{"binary":"metricbeat","dataset":"elastic_agent.metricbeat","id":"system/metrics-default","type":"system/metrics"},"log":{"source":"system/metrics-default"},"service.name":"metricbeat","ecs.version":"1.6.0","log.logger":"publisher_pipeline_output","log.origin":{"file.line":148,"file.name":"pipeline/client_worker.go"},"ecs.version":"1.6.0"}
elastic-system/elastic-agent-56z47[elastic-agent]: {"log.level":"info","@timestamp":"2023-12-12T22:04:35.435Z","message":"Attempting to reconnect to backoff(elasticsearch(https://elasticsearch-es-http.elastic-system.svc:9200)) with 5 reconnect attempt(s)","component":{"binary":"metricbeat","dataset":"elastic_agent.metricbeat","id":"system/metrics-default","type":"system/metrics"},"log":{"source":"system/metrics-default"},"service.name":"metricbeat","ecs.version":"1.6.0","log.logger":"publisher_pipeline_output","log.origin":{"file.line":139,"file.name":"pipeline/client_worker.go"},"ecs.version":"1.6.0"}
elastic-system/elastic-agent-56z47[elastic-agent]: {"log.level":"error","@timestamp":"2023-12-12T22:04:35.440Z","message":"Error dialing x509: certificate signed by unknown authority","component":{"binary":"metricbeat","dataset":"elastic_agent.metricbeat","id":"system/metrics-default","type":"system/metrics"},"log":{"source":"system/metrics-default"},"service.name":"metricbeat","ecs.version":"1.6.0","log.logger":"esclientleg","log.origin":{"file.line":38,"file.name":"transport/logging.go"},"network":"tcp","address":"elasticsearch-es-http.elastic-system.svc:9200","ecs.version":"1.6.0"}
elastic-system/elasticsearch-es-master-0[elasticsearch]: {"@timestamp":"2023-12-12T22:04:35.440Z", "log.level": "WARN", "message":"caught exception while handling client http traffic, closing connection Netty4HttpChannel{localAddress=/10.42.0.113:9200, remoteAddress=/188.40.132.107:50446}", "ecs.version": "1.2.0","service.name":"ES_ECS","event.dataset":"elasticsearch.server","process.thread.name":"elasticsearch[elasticsearch-es-master-0][transport_worker][T#4]","log.logger":"org.elasticsearch.http.AbstractHttpServerTransport","elasticsearch.cluster.uuid":"D-0ZZqAsSv-Vky-lVCshQw","elasticsearch.node.id":"dcG_f6KqTjWHSL8OJ6d7dg","elasticsearch.node.name":"elasticsearch-es-master-0","elasticsearch.cluster.name":"elasticsearch","error.type":"io.netty.handler.codec.DecoderException","error.message":"javax.net.ssl.SSLHandshakeException: Received fatal alert: bad_certificate","error.stack_trace":"io.netty.handler.codec.DecoderException: javax.net.ssl.SSLHandshakeException: Received fatal alert: bad_certificate\n\tat io.netty.codec@4.1.94.Final/io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:499)\n\tat io.netty.codec@4.1.94.Final/io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:290)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:440)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:166)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:788)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:689)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:652)\n\tat io.netty.transport@4.1.94.Final/io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:562)\n\tat io.netty.common@4.1.94.Final/io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997)\n\tat io.netty.common@4.1.94.Final/io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n\tat java.base/java.lang.Thread.run(Thread.java:1583)\nCaused by: javax.net.ssl.SSLHandshakeException: Received fatal alert: bad_certificate\n\tat java.base/sun.security.ssl.Alert.createSSLException(Alert.java:130)\n\tat java.base/sun.security.ssl.Alert.createSSLException(Alert.java:117)\n\tat java.base/sun.security.ssl.TransportContext.fatal(TransportContext.java:365)\n\tat java.base/sun.security.ssl.Alert$AlertConsumer.consume(Alert.java:287)\n\tat java.base/sun.security.ssl.TransportContext.dispatch(TransportContext.java:204)\n\tat java.base/sun.security.ssl.SSLTransport.decode(SSLTransport.java:172)\n\tat java.base/sun.security.ssl.SSLEngineImpl.decode(SSLEngineImpl.java:736)\n\tat java.base/sun.security.ssl.SSLEngineImpl.readRecord(SSLEngineImpl.java:691)\n\tat java.base/sun.security.ssl.SSLEngineImpl.unwrap(SSLEngineImpl.java:506)\n\tat java.base/sun.security.ssl.SSLEngineImpl.unwrap(SSLEngineImpl.java:482)\n\tat java.base/javax.net.ssl.SSLEngine.unwrap(SSLEngine.java:679)\n\tat io.netty.handler@4.1.94.Final/io.netty.handler.ssl.SslHandler$SslEngineType$3.unwrap(SslHandler.java:297)\n\tat io.netty.handler@4.1.94.Final/io.netty.handler.ssl.SslHandler.unwrap(SslHandler.java:1353)\n\tat io.netty.handler@4.1.94.Final/io.netty.handler.ssl.SslHandler.decodeJdkCompatible(SslHandler.java:1246)\n\tat io.netty.handler@4.1.94.Final/io.netty.handler.ssl.SslHandler.decode(SslHandler.java:1295)\n\tat io.netty.codec@4.1.94.Final/io.netty.handler.codec.ByteToMessageDecoder.decodeRemovalReentryProtection(ByteToMessageDecoder.java:529)\n\tat io.netty.codec@4.1.94.Final/io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:468)\n\t... 16 more\n"}
I installed eck-operator (2.10.0) and eck-stack (0.8.0) via helm.
values.yaml of eck-stack:
eck-elasticsearch:
enabled: true
fullnameOverride: elasticsearch
annotations:
eck.k8s.elastic.co/license: basic
volumeClaimDeletePolicy: DeleteOnScaledownOnly
nodeSets:
- name: master
count: 1
podTemplate:
spec:
initContainers:
- name: sysctl
securityContext:
privileged: true
runAsUser: 0
command:
- sh
- "-c"
- sysctl -w vm.max_map_count=262144
volumes:
- name: elasticsearch-data
emptyDir: {}
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 500Gi
eck-kibana:
enabled: true
fullnameOverride: kibana
annotations:
eck.k8s.elastic.co/license: basic
teleport.dev/protocol: https
spec:
elasticsearchRef:
name: elasticsearch
config:
server.publicBaseUrl: https://logs.asic.li
xpack.fleet.agents.elasticsearch.hosts: ["https://elasticsearch-es-http.elastic-system.svc:9200"]
xpack.fleet.agents.fleet_server.hosts: ["https://fleet-server-agent-http.elastic-system.svc:8220"]
xpack.fleet.packages:
- name: system
version: latest
- name: elastic_agent
version: latest
- name: fleet_server
version: latest
- name: kubernetes
version: latest
xpack.fleet.agentPolicies:
- name: Fleet Server on ECK policy
id: eck-fleet-server
namespace: default
monitoring_enabled:
- logs
- metrics
package_policies:
- name: fleet_server-1
id: fleet_server-1
package:
name: fleet_server
- name: Elastic Agent on ECK policy
id: eck-agent
namespace: default
monitoring_enabled:
- logs
- metrics
unenroll_timeout: 900
package_policies:
- package:
name: system
name: system-1
- package:
name: kubernetes
name: kubernetes-1
eck-fleet-server:
enabled: true
annotations:
eck.k8s.elastic.co/license: basic
fullnameOverride: "fleet-server"
spec:
# Agent policy to be used.
policyID: eck-fleet-server
kibanaRef:
name: kibana
elasticsearchRefs:
- name: elasticsearch
manifest to install agent (generated via kubernetes integration and adjusted):
---
# For more information https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-managed-by-fleet.html
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: elastic-agent
namespace: elastic-system
labels:
app: elastic-agent
spec:
selector:
matchLabels:
app: elastic-agent
template:
metadata:
labels:
app: elastic-agent
spec:
# Tolerations are needed to run Elastic Agent on Kubernetes control-plane nodes.
# Agents running on control-plane nodes collect metrics from the control plane components (scheduler, controller manager) of Kubernetes
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
- key: node-role.kubernetes.io/master
effect: NoSchedule
serviceAccountName: elastic-agent
hostNetwork: true
# 'hostPID: true' enables the Elastic Security integration to observe all process exec events on the host.
# Sharing the host process ID namespace gives visibility of all processes running on the same host.
hostPID: true
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: elastic-agent
image: docker.elastic.co/beats/elastic-agent:8.10.4
env:
# Set to 1 for enrollment into Fleet server. If not set, Elastic Agent is run in standalone mode
- name: FLEET_ENROLL
value: "1"
# Set to true to communicate with Fleet with either insecure HTTP or unverified HTTPS
- name: FLEET_INSECURE
value: "true"
# Fleet Server URL to enroll the Elastic Agent into
# FLEET_URL can be found in Kibana, go to Management > Fleet > Settings
- name: FLEET_URL
value: "https://fleet-server-agent-http.elastic-system.svc:8220"
# Elasticsearch API key used to enroll Elastic Agents in Fleet (https://www.elastic.co/guide/en/fleet/current/fleet-enrollment-tokens.html#fleet-enrollment-tokens)
# If FLEET_ENROLLMENT_TOKEN is empty then KIBANA_HOST, KIBANA_FLEET_USERNAME, KIBANA_FLEET_PASSWORD are needed
- name: FLEET_ENROLLMENT_TOKEN
value: "<HIDDEN>"
- name: KIBANA_HOST
value: "https://kibana-kb-http.elastic-system.svc:5601"
# The basic authentication username used to connect to Kibana and retrieve a service_token to enable Fleet
- name: ELASTICSEARCH_CA
valueFrom:
secretKeyRef:
name: elasticsearch-es-http-ca-internal
key: tls.crt
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
# The following ELASTIC_NETINFO:false variable will disable the netinfo.enabled option of add-host-metadata processor. This will remove fields host.ip and host.mac.
# For more info: https://www.elastic.co/guide/en/beats/metricbeat/current/add-host-metadata.html
- name: ELASTIC_NETINFO
value: "false"
securityContext:
runAsUser: 0
# The following capabilities are needed for 'Defend for containers' integration (cloud-defend)
# If you are using this integration, please uncomment these lines before applying.
#capabilities:
# add:
# - BPF # (since Linux 5.8) allows loading of BPF programs, create most map types, load BTF, iterate programs and maps.
# - PERFMON # (since Linux 5.8) allows attaching of BPF programs used for performance metrics and observability operations.
# - SYS_RESOURCE # Allow use of special resources or raising of resource limits. Used by 'Defend for Containers' to modify 'rlimit_memlock'
########################################################################################
# The following capabilities are needed for Universal Profiling.
# More fine graded capabilities are only available for newer Linux kernels.
# If you are using the Universal Profiling integration, please uncomment these lines before applying.
#procMount: "Unmasked"
#privileged: true
#capabilities:
# add:
# - SYS_ADMIN
resources:
limits:
memory: 700Mi
requests:
cpu: 100m
memory: 400Mi
volumeMounts:
- name: proc
mountPath: /hostfs/proc
readOnly: true
- name: cgroup
mountPath: /hostfs/sys/fs/cgroup
readOnly: true
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
- name: varlog
mountPath: /var/log
readOnly: true
- name: etc-full
mountPath: /hostfs/etc
readOnly: true
- name: var-lib
mountPath: /hostfs/var/lib
readOnly: true
- name: etc-mid
mountPath: /etc/machine-id
readOnly: true
- name: sys-kernel-debug
mountPath: /sys/kernel/debug
- name: elastic-agent-state
mountPath: /usr/share/elastic-agent/state
# If you are using the Universal Profiling integration, please uncomment these lines before applying.
#- name: universal-profiling-cache
# mountPath: /var/cache/Elastic
volumes:
- name: proc
hostPath:
path: /proc
- name: cgroup
hostPath:
path: /sys/fs/cgroup
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
- name: varlog
hostPath:
path: /var/log
# The following volumes are needed for Cloud Security Posture integration (cloudbeat)
# If you are not using this integration, then these volumes and the corresponding
# mounts can be removed.
- name: etc-full
hostPath:
path: /etc
- name: var-lib
hostPath:
path: /var/lib
# Mount /etc/machine-id from the host to determine host ID
# Needed for Elastic Security integration
- name: etc-mid
hostPath:
path: /etc/machine-id
type: File
# Needed for 'Defend for containers' integration (cloud-defend) and Universal Profiling
# If you are not using one of these integrations, then these volumes and the corresponding
# mounts can be removed.
- name: sys-kernel-debug
hostPath:
path: /sys/kernel/debug
# Mount /var/lib/elastic-agent-managed/elastic-system/state to store elastic-agent state
# Update 'elastic-system' with the namespace of your agent installation
- name: elastic-agent-state
hostPath:
path: /var/lib/elastic-agent-managed/elastic-system/state
type: DirectoryOrCreate
# Mount required for Universal Profiling.
# If you are using the Universal Profiling integration, please uncomment these lines before applying.
#- name: universal-profiling-cache
# hostPath:
# path: /var/cache/Elastic
# type: DirectoryOrCreate
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: elastic-agent
subjects:
- kind: ServiceAccount
name: elastic-agent
namespace: elastic-system
roleRef:
kind: ClusterRole
name: elastic-agent
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
namespace: elastic-system
name: elastic-agent
subjects:
- kind: ServiceAccount
name: elastic-agent
namespace: elastic-system
roleRef:
kind: Role
name: elastic-agent
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: elastic-agent-kubeadm-config
namespace: elastic-system
subjects:
- kind: ServiceAccount
name: elastic-agent
namespace: elastic-system
roleRef:
kind: Role
name: elastic-agent-kubeadm-config
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: elastic-agent
labels:
k8s-app: elastic-agent
rules:
- apiGroups: [""]
resources:
- nodes
- namespaces
- events
- pods
- services
- configmaps
# Needed for cloudbeat
- serviceaccounts
- persistentvolumes
- persistentvolumeclaims
verbs: ["get", "list", "watch"]
# Enable this rule only if planing to use kubernetes_secrets provider
#- apiGroups: [""]
# resources:
# - secrets
# verbs: ["get"]
- apiGroups: ["extensions"]
resources:
- replicasets
verbs: ["get", "list", "watch"]
- apiGroups: ["apps"]
resources:
- statefulsets
- deployments
- replicasets
- daemonsets
verbs: ["get", "list", "watch"]
- apiGroups:
- ""
resources:
- nodes/stats
verbs:
- get
- apiGroups: [ "batch" ]
resources:
- jobs
- cronjobs
verbs: [ "get", "list", "watch" ]
# Needed for apiserver
- nonResourceURLs:
- "/metrics"
verbs:
- get
# Needed for cloudbeat
- apiGroups: ["rbac.authorization.k8s.io"]
resources:
- clusterrolebindings
- clusterroles
- rolebindings
- roles
verbs: ["get", "list", "watch"]
# Needed for cloudbeat
- apiGroups: ["policy"]
resources:
- podsecuritypolicies
verbs: ["get", "list", "watch"]
- apiGroups: [ "storage.k8s.io" ]
resources:
- storageclasses
verbs: [ "get", "list", "watch" ]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: elastic-agent
# Should be the namespace where elastic-agent is running
namespace: elastic-system
labels:
k8s-app: elastic-agent
rules:
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs: ["get", "create", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: elastic-agent-kubeadm-config
namespace: elastic-system
labels:
k8s-app: elastic-agent
rules:
- apiGroups: [""]
resources:
- configmaps
resourceNames:
- kubeadm-config
verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: elastic-agent
namespace: elastic-system
labels:
k8s-app: elastic-agent
---
Greetings
Sebastiaan