PostgreSQL grok problem

Hello,
I am using PostgreSQL 12 with Filebeat 7.12. I followed (PostgreSQL module | Filebeat Reference [7.12] | Elastic) to configure filebeat. Right now Filebeat is sending the logs but I can see, there is a GROK failure and not all PostgreSQL dashboards are working.

Here is my filebeat confing:

filebeat.inputs:

  • type: log
    enabled: false
    paths:
    • /var/log/*.log
  • type: filestream
    enabled: false
    paths:
    • /var/log/.log
      filebeat.config.modules:
      path: ${path.config}/modules.d/
      .yml
      reload.enabled: false
      setup.template.settings:
      index.number_of_shards: 1
      setup.ilm.enabled: true
      setup.ilm.rollover_alias: "filebeat-servername"
      setup.ilm.pattern: "{now/d}"
      setup.dashboards.enabled: true
      setup.kibana:
      host: "kibana:5601"
      output.elasticsearch:
      hosts: ["elastic:9200"]
      processors:
    • add_host_metadata:
      when.not.contains.tags: forwarded
    • add_cloud_metadata: ~
    • add_docker_metadata: ~
    • add_kubernetes_metadata: ~

Here is my filebeat postgresql module config:

  • module: postgresql
    log:
    enabled: true
    var.paths: ["/data/pgsql/patroni/log/*.log"]

Here is error

{
"_index": "filebeat-servername-2021.05.17",
"_type": "_doc",
"_id": "aLp7f3kBXre3GWy8Jlop",
"_version": 1,
"_score": null,
"fields": {
"agent.version.keyword": [
"7.12.1"
],
"host.architecture.keyword": [
"x86_64"
],
"host.name.keyword": [
"servername"
],
"event.dataset.keyword": [
"postgresql.log"
],
"host.hostname": [
"servername"
],
"host.mac": [
"00:50:56:9a:39:bc"
],
"agent.hostname.keyword": [
"servername"
],
"service.type": [
"postgresql"
],
"ecs.version.keyword": [
"1.8.0"
],
"host.ip.keyword": [
"11.22.33.444",
"fe80::7f85:b9e:ec18:70d5"
],
"host.os.version": [
"7 (Core)"
],
"host.os.name": [
"CentOS Linux"
],
"agent.name": [
"servername"
],
"host.id.keyword": [
"8e08bb3b0d484afea7697b5308c72831"
],
"host.name": [
"servername"
],
"host.os.version.keyword": [
"7 (Core)"
],
"host.os.type": [
"linux"
],
"agent.id.keyword": [
"afcf8273-3ef8-4b70-9885-c0524458fca4"
],
"fileset.name": [
"log"
],
"input.type": [
"log"
],
"log.offset": [
171762268
],
"agent.hostname": [
"servername"
],
"host.architecture": [
"x86_64"
],
"fileset.name.keyword": [
"log"
],
"agent.id": [
"afcf8273-3ef8-4b70-9885-c0524458fca4"
],
"ecs.version": [
"1.8.0"
],
"host.containerized": [
false
],
"event.module.keyword": [
"postgresql"
],
"host.hostname.keyword": [
"servername"
],
"agent.version": [
"7.12.1"
],
"host.os.family": [
"redhat"
],
"service.type.keyword": [
"postgresql"
],
"input.type.keyword": [
"log"
],
"host.ip": [
"11.22.33.444",
"fe80::7f85:b9e:ec18:70d5"
],
"agent.type": [
"filebeat"
],
"event.module": [
"postgresql"
],
"host.os.kernel.keyword": [
"5.4.108-1.el7.elrepo.x86_64"
],
"host.os.kernel": [
"5.4.108-1.el7.elrepo.x86_64"
],
"host.os.name.keyword": [
"CentOS Linux"
],
"host.id": [
"8e08bb3b0d484afea7697b5308c72831"
],
"log.file.path.keyword": [
"/data/pgsql/patroni/log/postgresql-Tue.log"
],
"agent.type.keyword": [
"filebeat"
],
"agent.ephemeral_id.keyword": [
"04d7fc98-524a-4916-8959-0aa5b14662b2"
],
"host.os.codename.keyword": [
"Core"
],
"host.mac.keyword": [
"00:50:56:9a:39:bc"
],
"agent.name.keyword": [
"servername"
],
"host.os.codename": [
"Core"
],
"message": [
"2021-05-18 15:37:32.073 +03 [78544] postgres@postgres LOG: duration: 0.783 ms statement: SELECT pg_catalog.to_char(pg_catalog.pg_postmaster_start_time(), 'YYYY-MM-DD HH24:MI:SS.MS TZ'), CASE WHEN pg_catalog.pg_is_in_recovery() THEN 0 ELSE ('x' || pg_catalog.substr(pg_catalog.pg_walfile_name(pg_catalog.pg_current_wal_lsn()), 1, 8))::bit(32)::int END, CASE WHEN pg_catalog.pg_is_in_recovery() THEN 0 ELSE pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), '0/0')::bigint END, pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_last_wal_replay_lsn(), '0/0')::bigint, pg_catalog.pg_wal_lsn_diff(COALESCE(pg_catalog.pg_last_wal_receive_lsn(), '0/0'), '0/0')::bigint, pg_catalog.pg_is_in_recovery() AND pg_catalog.pg_is_wal_replay_paused(), pg_catalog.to_char(pg_catalog.pg_last_xact_replay_timestamp(), 'YYYY-MM-DD HH24:MI:SS.MS TZ'), pg_catalog.array_to_json(pg_catalog.array_agg(pg_catalog.row_to_json(ri))) FROM (SELECT (SELECT rolname FROM pg_authid WHERE oid = usesysid) AS usename, application_name, client_addr, w.state, sync_state, sync_priority FROM pg_catalog.pg_stat_get_wal_senders() w, pg_catalog.pg_stat_get_activity(pid)) AS ri"
],
"host.os.family.keyword": [
"redhat"
],
"event.ingested": [
"2021-05-18T12:37:27.720Z"
],
"@timestamp": [
"2021-05-18T12:37:32.772Z"
],
"host.os.type.keyword": [
"linux"
],
"host.os.platform": [
"centos"
],
"host.os.platform.keyword": [
"centos"
],
"error.message": [
"Provided Grok expressions do not match field value: [2021-05-18 15:37:32.073 +03 [78544] postgres@postgres LOG: duration: 0.783 ms statement: SELECT pg_catalog.to_char(pg_catalog.pg_postmaster_start_time(), 'YYYY-MM-DD HH24:MI:SS.MS TZ'), CASE WHEN pg_catalog.pg_is_in_recovery() THEN 0 ELSE ('x' || pg_catalog.substr(pg_catalog.pg_walfile_name(pg_catalog.pg_current_wal_lsn()), 1, 8))::bit(32)::int END, CASE WHEN pg_catalog.pg_is_in_recovery() THEN 0 ELSE pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), '0/0')::bigint END, pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_last_wal_replay_lsn(), '0/0')::bigint, pg_catalog.pg_wal_lsn_diff(COALESCE(pg_catalog.pg_last_wal_receive_lsn(), '0/0'), '0/0')::bigint, pg_catalog.pg_is_in_recovery() AND pg_catalog.pg_is_wal_replay_paused(), pg_catalog.to_char(pg_catalog.pg_last_xact_replay_timestamp(), 'YYYY-MM-DD HH24:MI:SS.MS TZ'), pg_catalog.array_to_json(pg_catalog.array_agg(pg_catalog.row_to_json(ri))) FROM (SELECT (SELECT rolname FROM pg_authid WHERE oid = usesysid) AS usename, application_name, client_addr, w.state, sync_state, sync_priority FROM pg_catalog.pg_stat_get_wal_senders() w, pg_catalog.pg_stat_get_activity(pid)) AS ri]"
],
"log.file.path": [
"/data/pgsql/patroni/log/postgresql-Tue.log"
],
"agent.ephemeral_id": [
"04d7fc98-524a-4916-8959-0aa5b14662b2"
],
"event.dataset": [
"postgresql.log"
]
},
"highlight": {
"agent.name": [
"@kibana-highlighted-field@servername@/kibana-highlighted-field@"
]
},
"sort": [
1621341452772
]
}

What could be the problem? Thank you.

Hello! Which version of postgresql are you using? Based on the documentation, the postgresql module using .log was tested with logs from versions 9.5 on Ubuntu, 9.6 on Debian, and finally 10.11, 11.4 and 12.2 on Arch Linux 9.3. If you are using a different version, please feel free to create a github issue with the postgresql version, a sample log file/log line :slightly_smiling_face: TIA!!!

Hello Kaiyan,

It is PostgreSQL 12.6 on Centos7. If this version is not supported, I will try to edit GROK pattern. To do it, do you have any guidance ? I dont know how to change filebeat postgresql GROK patterns. Thanks!

Cool! If you want, you can push a PR with your grok pattern change to add support for postgresql 12.6 :slight_smile: We will be more than happy to review it!

Grok pattern for postgresql log is here. Please add a test file and run command to generate the -expected.json file using command like TESTING_FILEBEAT_MODULES=postgresql mage -v pythonIntegTest to verify if the grok pattern change works.

Hi Kaiyan,

I am not a developer but I created a GROK pattern based on log prefix (This prefix is more readable.)

Log_prefix: '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h '

GROK pattern : %{TIMESTAMP_ISO8601:timestamp}%{SPACE}%{INT:utc}%{SPACE}[%{NUMBER:pid:long}]:%{SPACE}[%{WORD:logline}-1]%{SPACE}user=%{USERNAME:user},db=%{WORD:db},app=%{NOTSPACE:application},client=%{IPV4:client}%{SPACE}%{WORD:logtype}:%{SPACE}%{GREEDYDATA:message}

I am sorry but I could not find about implementing this pattern to filebeat. Can you guide me how to test it?

Also there is still GROK failure with my pattern, if any of property has null value, I will be pleased if somebody help me to fix it as well. thanks!

I created a new pipeline as below
image

and added it to root pipeline

but nothing changed :frowning: I restarted Elasticsearch service, but could not make GROK pattern work :frowning: Can somebody guide me? Thanks.

Can anybody help me?

looks like u have a slightly different format for the timezone. Can you submit an issue to Github with this issue, the sample log data... I think I already have the fix.

I updated the grok pattern

- grok:
    field: message
    patterns:
    - '^%{DATETIME:postgresql.log.timestamp}%{CHAR:separator}%{GREEDYDATA:raw_message}'
    pattern_definitions:
      DATETIME: '[-0-9]+ %{TIME} (%{WORD:event.timezone}|%{TIMEZONE:event.timezone})'
      TIMEZONE: '\+\d{2}'
      CHAR: .
      GREEDYDATA: |-
        (.|
        |	)*

And I updated the date parsing pattern

- date:
    field: postgresql.log.timestamp
    target_field: '@timestamp'
    formats:
    - yyyy-MM-dd HH:mm:ss.SSS zz
    - yyyy-MM-dd HH:mm:ss zz
    - yyyy-MM-dd HH:mm:ss X
    - yyyy-MM-dd HH:mm:ss.SSS X

Hi @Cihan_Tunali :slightly_smiling_face:

I was taking a look at this now and I was wondering why or how your timestamp format is different from the default format that comes with Postgres 12.

I have just spin up a postgres instance from docker and my timestamp looks like this 2021-06-03 15:16:42.575 UTC [1] LOG: database system is ready to accept connections.

Can you paste here your postgres config regarding your timezone format?

Hello @Mario_Castro

Here is one of my log part as you requested:

2021-06-04 00:49:54 +03 [98853]: [87372-1] user=postgres,db=postgres,app=[unknown],client=123.123.123.123 LOG: duration: 0.222 ms statement: SELECT * FROM pg_stat_database_conflicts;
2021-06-04 00:49:54 +03 [98853]: [87373-1] user=postgres,db=postgres,app=[unknown],client=123.123.123.123 LOG: duration: 0.574 ms statement: SELECT pg_database.datname,tmp.mode,COALESCE(count,0) as count
FROM
(
VALUES ('accesssharelock'),
('rowsharelock'),
('rowexclusivelock'),
('shareupdateexclusivelock'),
('sharelock'),
('sharerowexclusivelock'),
('exclusivelock'),
('accessexclusivelock')
) AS tmp(mode) CROSS JOIN pg_database
LEFT JOIN
(SELECT database, lower(mode) AS mode,count(*) AS count
FROM pg_locks WHERE database IS NOT NULL
GROUP BY database, lower(mode)
) AS tmp2
ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database ORDER BY 1
2021-06-04 00:49:54 +03 [98853]: [87374-1] user=postgres,db=postgres,app=[unknown],client=123.123.123.123 LOG: duration: 0.636 ms statement:
SELECT *,
(case pg_is_in_recovery() when 't' then null else pg_current_wal_lsn() end) AS pg_current_wal_lsn,
(case pg_is_in_recovery() when 't' then null else pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn)::float end) AS pg_wal_lsn_diff
FROM pg_stat_replication

2021-06-04 00:49:54 +03 [98853]: [87375-1] user=postgres,db=postgres,app=[unknown],client=123.123.123.123 LOG: duration: 0.743 ms statement:
SELECT
pg_database.datname,
tmp.state,
COALESCE(count,0) as count,
COALESCE(max_tx_duration,0) as max_tx_duration
FROM
(
VALUES ('active'),
('idle'),
('idle in transaction'),
('idle in transaction (aborted)'),
('fastpath function call'),
('disabled')
) AS tmp(state) CROSS JOIN pg_database
LEFT JOIN
(
SELECT
datname,
state,
count(*) AS count,
MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration
FROM pg_stat_activity GROUP BY datname,state) AS tmp2
ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname

2021-06-04 00:49:54 +03 [98853]: [87376-1] user=postgres,db=postgres,app=[unknown],client=123.123.123.123 LOG: duration: 0.200 ms statement: SELECT * FROM pg_stat_bgwriter;
2021-06-04 00:49:54 +03 [98850]: [44781-1] user=postgres,db=postgres,app=Patroni,client=123.123.123.123 LOG: duration: 0.389 ms statement: SELECT CASE WHEN pg_catalog.pg_is_in_recovery() THEN 0 ELSE ('x' || pg_catalog.substr(pg_catalog.pg_walfile_name(pg_catalog.pg_current_wal_lsn()), 1, 8))::bit(32)::int END, CASE WHEN pg_catalog.pg_is_in_recovery() THEN 0 ELSE pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), '0/0')::bigint END, pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_last_wal_replay_lsn(), '0/0')::bigint, pg_catalog.pg_wal_lsn_diff(COALESCE(pg_catalog.pg_last_wal_receive_lsn(), '0/0'), '0/0')::bigint, pg_catalog.pg_is_in_recovery() AND pg_catalog.pg_is_wal_replay_paused(), 0, CASE WHEN latest_end_lsn IS NULL THEN NULL ELSE received_tli END, slot_name, conninfo FROM pg_catalog.pg_stat_get_wal_receiver()
2021-06-04 00:49:54 +03 [98850]: [44782-1] user=postgres,db=postgres,app=Patroni,client=123.123.123.123 LOG: duration: 3.345 ms statement: SELECT name, setting, unit, vartype, context, sourcefile FROM pg_catalog.pg_settings WHERE pg_catalog.lower(name) = ANY(ARRAY['listen_addresses','port','cluster_name','wal_level','hot_standby','max_connections','max_wal_senders','max_prepared_transactions','max_locks_per_transaction','track_commit_timestamp','max_replication_slots','max_worker_processes','wal_log_hints','archive_mode','archive_timeout','checkpoint_completion_target','default_statistics_target','effective_cache_size','effective_io_concurrency','log_line_prefix','log_min_duration_statement','log_rotation_age','log_statement','logging_collector','maintenance_work_mem','max_parallel_maintenance_workers','max_parallel_workers','max_parallel_workers_per_gather','max_wal_size','min_wal_size','random_page_cost','shared_buffers','shared_preload_libraries','wal_buffers','wal_keep_segments','work_mem','wal_segment_size','wal_block_size'])
2021-06-04 00:49:54 +03 [98850]: [44783-1] user=postgres,db=postgres,app=Patroni,client=123.123.123.123 LOG: duration: 1.223 ms statement: SELECT pg_catalog.to_char(pg_catalog.pg_postmaster_start_time(), 'YYYY-MM-DD HH24:MI:SS.MS TZ'), CASE WHEN pg_catalog.pg_is_in_recovery() THEN 0 ELSE ('x' || pg_catalog.substr(pg_catalog.pg_walfile_name(pg_catalog.pg_current_wal_lsn()), 1, 8))::bit(32)::int END, CASE WHEN pg_catalog.pg_is_in_recovery() THEN 0 ELSE pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), '0/0')::bigint END, pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_last_wal_replay_lsn(), '0/0')::bigint, pg_catalog.pg_wal_lsn_diff(COALESCE(pg_catalog.pg_last_wal_receive_lsn(), '0/0'), '0/0')::bigint, pg_catalog.pg_is_in_recovery() AND pg_catalog.pg_is_wal_replay_paused(), pg_catalog.to_char(pg_catalog.pg_last_xact_replay_timestamp(), 'YYYY-MM-DD HH24:MI:SS.MS TZ'), pg_catalog.array_to_json(pg_catalog.array_agg(pg_catalog.row_to_json(ri))) FROM (SELECT (SELECT rolname FROM pg_authid WHERE oid = usesysid) AS usename, application_name, client_addr, w.state, sync_state, sync_priority FROM pg_catalog.pg_stat_get_wal_senders() w, pg_catalog.pg_stat_get_activity(pid)) AS ri
2021-06-04 00:49:54 +03 [98828]: [9-1] user=,db=,app=,client= LOG: received SIGHUP, reloading configuration files
2021-06-04 00:49:54 +03 [98828]: [10-1] user=,db=,app=,client= LOG: parameter "log_min_duration_statement" changed to "500ms"
2021-06-04 00:49:54 +03 [98828]: [11-1] user=,db=,app=,client= LOG: parameter "log_statement" changed to "ddl"

I am using Patroni for managing my Pgsql cluster. My pgsql version is

PostgreSQL 12.4 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 7.3.1 20180303 (Red Hat 7.3.1-5), 64-bit

@legoguy1000 after long research I could not implement my GROK to pipeline so I decided to disable filebeat's posgtresql module. After that, manually I forced filebeat to use my pipeline from filebeat.yml

output.elasticsearch:

Array of hosts to connect to.

hosts: ["gbzmngelk-1:9200"]
pipeline: "postgres"

Here is my pipeline file:

[
{
"set": {
"field": "event.ingested",
"value": "{{_ingest.timestamp}}"
}
},
{
"grok": {
"field": "message",
"patterns": [
"%{TIMESTAMP_ISO8601:timestamp}%{SPACE}%{INT:utc}%{SPACE}\[%{NUMBER:pid:long}]:%{SPACE}\[%{NUMBER:logline}-1]%{SPACE}user=%{USERNAME:pgsql_user},db=%{WORD:pgsql_database},app=%{NOTSPACE:pgsql_application},client=%{IPV4:pgsql_client}%{SPACE}%{WORD:pgsql_logtype}:%{SPACE}%{GREEDYDATA:pgsql_message}"
],
"pattern_definitions": {
"DATETIME": "[-0-9]+ %{TIME} %{WORD:event.timezone}",
"CHAR": ".",
"GREEDYDATA": "(.|\n|\t)*"
}
}
},
{
"date": {
"formats": [
"yyyy-MM-dd HH:mm:ss.SSS zz",
"yyyy-MM-dd HH:mm:ss zz"
],
"field": "postgresql.log.timestamp",
"target_field": "@timestamp"
}
},
{
"script": {
"params": {
"scale": 1000000
},
"if": "ctx.temp?.duration != null",
"lang": "painless",
"source": "ctx.event.duration = Math.round(ctx.temp.duration * params.scale)"
}
},
{
"remove": {
"field": "temp.duration",
"ignore_missing": true
}
},
{
"set": {
"field": "event.kind",
"value": "logtype"
}
},
{
"set": {
"field": "event.user",
"value": "user"
}
},
{
"set": {
"field": "event.application",
"value": "application"
}
},
{
"set": {
"field": "event.source",
"value": "client"
}
},
{
"append": {
"field": "event.category",
"value": [
"database"
]
}
},
{
"remove": {
"field": [
"separator"
]
}
},
{
"remove": {
"field": [
"raw_message"
]
}
}
]

But still it is not parsing %100. If I open all statements logging, whoouu it gave tons of GROK parse error :grin: Right now logging enable for DDL.

With all these informations any help would be adorable :pray:

In order to parse your timestamp field correctly with the +00 time zone, you need the additional date formats.

    - yyyy-MM-dd HH:mm:ss X
    - yyyy-MM-dd HH:mm:ss.SSS X

done.

image

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.