Hi,
Periodically, we are finding our Windows servers stop sending perfmon metrics (other metrics e.g. cpu from the system module) continue just fine.
The current workaround that gets them going again is restarting the metricbeat service.
details
metricbeat version 7.9.2 (amd64), libbeat 7.9.2 [2ab907f5ccecf9fd82fe37105082e89fd871f684 built 2020-09-22 23:25:16 +0000 UTC]
Example log entries:
2021-03-13T23:36:24.015+1000 ERROR [perfmon] perfmon/perfmon.go:64 no counter paths were found
2021-03-13T23:36:24.015+1000 INFO module/wrapper.go:259 Error fetching data for metricset windows.perfmon: failed reading counters: failed collecting counter values: No data to return.
2021-03-13T23:36:34.015+1000 ERROR [perfmon] perfmon/perfmon.go:64 no counter paths were found
2021-03-13T23:36:34.015+1000 INFO module/wrapper.go:259 Error fetching data for metricset windows.perfmon: failed reading counters: failed collecting counter values: No data to return.
2021-03-13T23:36:44.016+1000 ERROR [perfmon] perfmon/perfmon.go:64 no counter paths were found
2021-03-13T23:36:44.016+1000 INFO module/wrapper.go:259 Error fetching data for metricset windows.perfmon: failed reading counters: failed collecting counter values: No data to return.
2021-03-13T23:36:53.248+1000 INFO [monitoring] log/log.go:145 Non-zero metrics in the last 30s {"monitoring": {"metrics": {"beat":{"cpu":{"system":{"ticks":4131515,"time":{"ms":344}},"total":{"ticks":6892108,"time":{"ms"
:469},"value":6892108},"user":{"ticks":2760593,"time":{"ms":125}}},"handles":{"open":387},"info":{"ephemeral_id":"939d67d4-fca9-4757-ac74-88cab028ff12","uptime":{"ms":669242075}},"memstats":{"gc_next":23646592,"memory_alloc":13672448,"me
mory_total":146326614688,"rss":8192},"runtime":{"goroutines":69}},"libbeat":{"config":{"module":{"running":4}},"output":{"events":{"acked":48,"batches":6,"total":48}},"pipeline":{"clients":9,"events":{"active":0,"published":48,"total":48
},"queue":{"acked":48}}},"metricbeat":{"system":{"cpu":{"events":3,"success":3},"memory":{"events":3,"success":3},"network":{"events":9,"success":9},"process":{"events":24,"success":24},"process_summary":{"events":3,"success":3},"socket_
summary":{"events":3,"success":3}},"windows":{"perfmon":{"events":3,"failures":3}}}}}}
Configuration
- module: system
period: 10s
metricsets:
- cpu
- memory
- network
- process
- process_summary
- socket_summary
process.include_top_n:
by_cpu: 5
by_memory: 5
- module: system
period: 15m
metricsets:
- uptime
- module: windows
metricsets: ["service"]
enabled: true
period: 60s
- module: windows
metricsets: [perfmon]
period: 10s
perfmon.ignore_non_existent_counters: true
perfmon.group_measurements_by_instance: true
perfmon.queries:
- object: "ServiceModelService 4.0.0.0"
instance: ["*"]
counters:
- name: "Calls"
field: ServiceModelService_Calls
- name: "Calls Per Second"
field: ServiceModelService_Calls.per_sec
- name: "Calls Outstanding"
field: "ServiceModelService_Calls.outstanding"
- name: "Calls Failed"
field: "ServiceModelService_Calls.failed"
- name: "Calls Failed Per Second"
field: "ServiceModelService_Calls.failed.per_sec"
- name: "Calls Faulted"
field: "ServiceModelService_Calls.faulted"
- name: "Calls Duration"
field: "ServiceModelService_Calls.duration"
- name: "Instances"
field: "ServiceModelService_Instances"
- name: "Instances Created Per Second"
field: "ServiceModelService_Instances.per_sec"
- name: "Percent Of Max Concurrent Calls"
field: "ServiceModelService_MaxConcurrent.calls.pct"
- name: "Percent Of Max Concurrent Instances"
field: "ServiceModelService_MaxConcurrent.instances.pct"
- name: "Percent Of Max Concurrent Sessions"
field: "ServiceModelService_MaxConcurrent.sessions.pct"
- object: ".NET Data Provider for SqlServer"
instance: ["*"]
counters:
- name: "NumberOfActiveConnections"
field: SqlServer_NumberOfActiveConnections
- name: "NumberOfPooledConnections"
field: SqlServer_NumberOfPooledConnections
Thank you!