I am attempting to migrate my EKS cluster Elastic APM agent from the stand-alone agent to elastic-agent. I'm using a fleet-managed policy with an agent deployed using Elastic Operator, which sends data to the cloud. I'm choosing to do it this way rather than send data directly to Elastic Cloud to minimize impact on my users.
I have everything configured so that the config appears to be identical to my (working) standalone config AFAIK. However, I get the following errors when I attempt to send events to the Elastic Agent instance, while events sent to the standalone instance go through just fine.
The one item requested in the Fleet policy specification that doesn't appear to be in the policy when I inspect it from the agent is the URL
specification. My Elastic Agent is listening on the following service:
https://elastic-apm.elk-dev.svc.cluster.local:8200
However, the virtual service and gateway sending to this cluster is listening on:
https://elastic-apm.internal.example.com:8200
I've tried both configs, with the same results.
Application Errors
{"@timestamp":"2022-08-26T14:14:23.732Z","log.level":"error","message":"Failed to submit message: \"Unable to reach APM Server: HTTPSConnectionPool(host='elastic-apm.internal.example.com', port=8200): Max retries exceeded with url: /intake/v2/events (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1131)'))) (url: https://elastic-apm.internal.example.com:8200/intake/v2/events)\"","ecs":{"version":"1.6.0"},"error":{"message":"Unable to reach APM Server: HTTPSConnectionPool(host='elastic-apm.internal.example.com', port=8200): Max retries exceeded with url: /intake/v2/events (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1131)'))) (url: https://elastic-apm.internal.example.com:8200/intake/v2/events)","stack_trace":" File \"/venv/lib/python3.8/site-packages/elasticapm/transport/base.py\", line 239, in _flush\n self.send(data)\n File \"/venv/lib/python3.8/site-packages/elasticapm/transport/http.py\", line 100, in send\n raise TransportException(message, data, rint_trace=print_trace)\n","type":"TransportException"},"event":{"dataset":"my-service.log"},"log":{"logger":"elasticapm.transport","origin":{"file":{"line":297,"name":"base.py"},"function":"handle_transport_fail"},"original":"Failed to submit message: \"Unable to reach APM Server: HTTPSConnectionPool(host='elastic-apm.internal.example.com', port=8200): Max retries exceeded with url: /intake/v2/events (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1131)'))) (url: https://elastic-apm.internal.example.com:8200/intake/v2/events)\""},"process":{"name":"MainProcess","pid":1,"thread":{"id":140702020279096,"name":"eapm event processor thread"}},"service":{"name":"my-service"}}
and
{"@timestamp":"2022-08-26T14:14:42.948Z","log.level":"error","message":"dropping flushed data due to transport failure back-off","ecs":{"version":"1.6.0"},"even t":{"dataset":"my-service.log"},"log":{"logger":"elasticapm.transport","origin":{"file":{"line":231,"name":"base.py"},"function":"_flush"},"original":"droppin g flushed data due to transport failure back-off"},"process":{"name":"MainProcess","pid":1,"thread":{"id":140702020279096,"name":"eapm event processor thread"}} ,"service":{"name":"my-service"}}
Policy Config Generated by Fleet
agent:
download:
source_uri: https://artifacts.elastic.co/downloads/
monitoring:
enabled: false
logs: false
metrics: false
fleet:
hosts:
- https://my-elastic-cloud-fleet-host.fleet.us-east-1.aws.found.io:443
id: e30c6a10-14d3-11ed-a825-4b1a88ea1be9
inputs:
- apm-server:
agent_config: []
auth:
anonymous:
allow_agent: null
allow_service: null
enabled: true
rate_limit:
event_limit: 300
ip_limit: 1000
api_key:
enabled: false
limit: 100
secret_token: null
capture_personal_data: true
default_service_environment: null
expvar.enabled: false
host: 0.0.0.0:8200
idle_timeout: 45s
java_attacher:
discovery-rules: null
download-agent-version: null
enabled: false
max_connections: 0
max_event_size: 307200
max_header_size: 1.048576e+06
pprof.enabled: false
read_timeout: 3600s
response_headers: null
rum:
allow_headers: null
allow_origins:
- '*'
enabled: true
exclude_from_grouping: ^/webpack
library_pattern: node_modules|bower_components|~
response_headers: null
source_mapping:
metadata: []
sampling:
tail:
enabled: false
interval: 1m
policies:
- sample_rate: 0.1
storage_limit: 3GB
shutdown_timeout: 30s
ssl:
certificate: certs/cert.pem
cipher_suites: null
curve_types: null
enabled: true
key: certs/key.pem
key_passphrase: null
supported_protocols:
- TLSv1.1
- TLSv1.2
- TLSv1.3
write_timeout: 30s
data_stream:
namespace: default
id: 73ee33d4-7531-493f-94b6-f0022995d066
meta:
package:
name: apm
version: 8.4.0
name: Elastic APM Non-Prod
revision: 5
type: apm
use_output: default
output_permissions:
default:
_elastic_agent_checks:
cluster:
- monitor
_elastic_agent_monitoring:
indices: []
73ee33d4-7531-493f-94b6-f0022995d066:
indices:
- names:
- logs-apm.app-default
privileges:
- auto_configure
- create_doc
- names:
- metrics-apm.app.*-default
privileges:
- auto_configure
- create_doc
- names:
- logs-apm.error-default
privileges:
- auto_configure
- create_doc
- names:
- metrics-apm.internal-default
privileges:
- auto_configure
- create_doc
- names:
- metrics-apm.profiling-default
privileges:
- auto_configure
- create_doc
- names:
- traces-apm.rum-default
privileges:
- auto_configure
- create_doc
- names:
- traces-apm.sampled-default
privileges:
- auto_configure
- create_doc
- maintenance
- monitor
- read
- names:
- traces-apm-default
privileges:
- auto_configure
- create_doc
outputs:
default:
api_key: changeme
hosts:
- https://my-elastic-cloud-es-host.us-east-1.aws.found.io:443
type: elasticsearch
revision: 19