Remove fields based on a regex in the ingest pipeline

Hello,

I use elastic-agents to save docker logs by using the "Custom logs" integration.

When uploading logs, fields are added according to the labels of the different containers from which the logs are retrieved.

However, having several thousand containers, there are several thousand labels that are registered as fields like this:

So I would like to filter out all labels that start with traefik_, probably through a "regex" so as not to save them as fields.

Moreover, I want to keep all the other labels like for example those that start with com_docker_compose_

I think I should do that directly in ingest pipeline, but I can't find a way to do this, does anyone have any advice for me?

The number of fields explode and exceed 3000, I have to increase the limit every week.

Also, I tried to modify the labels to put the type flattened without success.

Thanks a lot for your help !

Example of what I tried with using flattened :

PUT /logs-docker-default/_mapping
{
  "properties": {
    "container": {
      "properties": {
        "labels" : {
          "type": "flattened"
        }
      }
    }
  }
}

PUT /_index_template/logs
{
  "template": {
    "mappings": {
      "properties": {
        "container": {
          "properties": {
            "labels" : {
              "type": "flattened"
            }
          }
        }
      }
    }
  }
}

Also, I tried to follow some links like :

Here, two example of documents in JSON :

  • The first with labels beginning by traefik_
  • The second without
{
    "_index": ".ds-logs-docker-default-2022.05.10-000005",
    "_id": "E59Vt4ABfh4vVCreaaaa",
    "_version": 1,
    "_score": 1,
    "_source": {
      "container": {
        "image": {
          "name": "docker.poiuyt.pro/nginx:1.6.1"
        },
        "name": "milipooro_production_nginx",
        "id": "06efecb60d198c397b4e8babf7d3fa4140b2a3e40035d61495c85f04bf7caaaa",
        "labels": {
          "updater_instance": "milipooro",
          "updater_version": "2.13.3",
          "traefik_enable": "true",
          "traefik_http_routers_milipooro_production_nginx_rule": "Host(`milipooro.poiuyt-vabeny.com`,`milipooro.production.poiuyt-vabeny.com`)",
          "updater_environment": "production",
          "updater_service": "nginx",
          "updater_config-hash": "9f72f45f56e3e8a9e3a74656f5135c605a3caa4d9e3e1ea73594ed8ebb84aaaa",
          "traefik_http_routers_milipooro_production_nginx_tls": "true",
          "traefik_http_routers_milipooro_production_nginx_entrypoints": "https",
          "maintainer": "poiuyt lightening <administrateur@poiuyt.lightening>"
        }
      },
      "agent": {
        "name": "aerghipo",
        "id": "a094a4cf-6c6e-4dc8-9272-20b17a25aaaa",
        "ephemeral_id": "abfdb1cf-bcbc-47d5-b921-65f6d10caaaa",
        "type": "filebeat",
        "version": "8.2.0"
      },
      "log": {
        "file": {
          "path": "/var/lib/docker/containers/06efecb60d198c397b4e8babf7d3fa4140b2a3e40035d61495c85f04bf7caaaa/06efecb60d198c397b4e8babf7d3fa4140b2a3e40035d61495c85f04bf7caaaa-json.log"
        },
        "offset": 4538657,
        "data": {
          "log": "100.200.100.2 - - [12/May/2022:10:14:27 +0200] \"GET / HTTP/1.1\" 302 322 \"-\" \"updown.io daemon 2.6\" \"100.200.100.2\"\n",
          "stream": "stdout",
          "time": "2022-05-12T08:14:27.593405075Z"
        }
      },
      "elastic_agent": {
        "id": "a094a4cf-6c6e-4dc8-9272-20b17a25aaaa",
        "version": "8.2.0",
        "snapshot": false
      },
      "message": "{\"log\":\"100.200.100.2 - - [12/May/2022:10:14:27 +0200] \\\"GET / HTTP/1.1\\\" 302 322 \\\"-\\\" \\\"updown.io daemon 2.6\\\" \\\"100.200.100.2\\\"\\n\",\"stream\":\"stdout\",\"time\":\"2022-05-12T08:14:27.593405075Z\"}",
      "input": {
        "type": "log"
      },
      "@timestamp": "2022-05-12T08:14:30.689Z",
      "ecs": {
        "version": "8.0.0"
      },
      "data_stream": {
        "namespace": "default",
        "type": "logs",
        "dataset": "docker"
      },
      "host": {
        "hostname": "aerghipo",
        "os": {
          "kernel": "5.4.0-100-generic",
          "codename": "focal",
          "name": "Ubuntu",
          "type": "linux",
          "family": "debian",
          "version": "20.04.4 LTS (Focal Fossa)",
          "platform": "ubuntu"
        },
        "containerized": true,
        "ip": [
          "10.210.0.20"
        ],
        "name": "aerghipo",
        "mac": [
          "0a:1b:2c:3d:4e:5f"
        ],
        "architecture": "x86_64"
      },
      "event": {
        "dataset": "docker"
      }
    },
    "fields": {
      "container.labels.traefik_enable": [
        "true"
      ],
      "elastic_agent.version": [
        "8.2.0"
      ],
      "log.data.stream": [
        "stdout"
      ],
      "host.hostname": [
        "aerghipo"
      ],
      "host.mac": [
        "0a:1b:2c:3d:4e:5f"
      ],
      "container.labels.updater_environment": [
        "production"
      ],
      "container.id": [
        "06efecb60d198c397b4e8babf7d3fa4140b2a3e40035d61495c85f04bf7caaaa"
      ],
      "log.data.time": [
        "2022-05-12T08:14:27.593Z"
      ],
      "container.labels.updater_service": [
        "nginx"
      ],
      "host.ip": [
        "10.210.0.20"
      ],
      "agent.type": [
        "filebeat"
      ],
      "container.name": [
        "milipooro_production_nginx"
      ],
      "container.image.name": [
        "docker.poiuyt.pro/nginx:1.6.1"
      ],
      "host.os.version": [
        "20.04.4 LTS (Focal Fossa)"
      ],
      "host.os.kernel": [
        "5.4.0-100-generic"
      ],
      "host.os.name": [
        "Ubuntu"
      ],
      "agent.name": [
        "aerghipo"
      ],
      "container.labels.traefik_http_routers_milipooro_production_nginx_tls": [
        "true"
      ],
      "elastic_agent.snapshot": [
        false
      ],
      "host.name": [
        "aerghipo"
      ],
      "container.labels.traefik_http_routers_milipooro_production_nginx_entrypoints": [
        "https"
      ],
      "host.os.type": [
        "linux"
      ],
      "elastic_agent.id": [
        "a094a4cf-6c6e-4dc8-9272-20b17a25aaaa"
      ],
      "data_stream.namespace": [
        "default"
      ],
      "host.os.codename": [
        "focal"
      ],
      "input.type": [
        "log"
      ],
      "container.labels.updater_instance": [
        "milipooro"
      ],
      "log.offset": [
        4538657
      ],
      "message": [
        "{\"log\":\"100.200.100.2 - - [12/May/2022:10:14:27 +0200] \\\"GET / HTTP/1.1\\\" 302 322 \\\"-\\\" \\\"updown.io daemon 2.6\\\" \\\"100.200.100.2\\\"\\n\",\"stream\":\"stdout\",\"time\":\"2022-05-12T08:14:27.593405075Z\"}"
      ],
      "data_stream.type": [
        "logs"
      ],
      "log.data.log": [
        "100.200.100.2 - - [12/May/2022:10:14:27 +0200] \"GET / HTTP/1.1\" 302 322 \"-\" \"updown.io daemon 2.6\" \"100.200.100.2\"\n"
      ],
      "container.labels.traefik_http_routers_milipooro_production_nginx_rule": [
        "Host(`milipooro.poiuyt-vabeny.com`,`milipooro.production.poiuyt-vabeny.com`)"
      ],
      "host.architecture": [
        "x86_64"
      ],
      "container.labels.maintainer": [
        "poiuyt lightening <administrateur@poiuyt.lightening>"
      ],
      "@timestamp": [
        "2022-05-12T08:14:30.689Z"
      ],
      "agent.id": [
        "a094a4cf-6c6e-4dc8-9272-20b17a25aaaa"
      ],
      "host.os.platform": [
        "ubuntu"
      ],
      "host.containerized": [
        true
      ],
      "ecs.version": [
        "8.0.0"
      ],
      "log.file.path": [
        "/var/lib/docker/containers/06efecb60d198c397b4e8babf7d3fa4140b2a3e40035d61495c85f04bf7caaaa/06efecb60d198c397b4e8babf7d3fa4140b2a3e40035d61495c85f04bf7caaaa-json.log"
      ],
      "data_stream.dataset": [
        "docker"
      ],
      "container.labels.updater_config-hash": [
        "9f72f45f56e3e8a9e3a74656f5135c605a3caa4d9e3e1ea73594ed8ebb84aaaa"
      ],
      "agent.ephemeral_id": [
        "abfdb1cf-bcbc-47d5-b921-65f6d10caaaa"
      ],
      "agent.version": [
        "8.2.0"
      ],
      "host.os.family": [
        "debian"
      ],
      "event.dataset": [
        "docker"
      ],
      "container.labels.updater_version": [
        "2.13.3"
      ]
    }
  }
{
  "_index": ".ds-logs-docker-default-2022.05.04-000004",
  "_id": "2954roABfh4vVCraaaaa",
  "_version": 1,
  "_score": 1,
  "_source": {
    "container": {
      "image": {
        "name": "sha256:372a2fa8045d43e0bf76ffe538d855477feeeff8c2f6c6f85c9638df46eaaaaa"
      },
      "name": "iava_gotenberg",
      "id": "c89e914f3ae71d0cb840aa6e05b7aa155e990e4b10701687486fc0c397aaaaaa",
      "labels": {
        "updater_instance": "iava",
        "github": "https://github.com/gotenberg/gotenberg",
        "website": "https://gotenberg.dev",
        "updater_version": "2.13.3",
        "author": "Julien Neuhart",
        "description": "A Docker-powered stateless API for PDF files.",
        "updater_service": "gotenberg",
        "updater_config-hash": "79c9372e045956a79508042941c02c2f179cae9b729de97dccb10ae91a9aaaaa",
        "version": "7.5.2"
      }
    },
    "agent": {
      "name": "ourtyoiu",
      "id": "a094a4cf-6c6e-4dc8-9272-20b17a2aaaaa",
      "ephemeral_id": "abfdb1cf-bcbc-47d5-b921-65f6d10aaaaa",
      "type": "filebeat",
      "version": "8.2.0"
    },
    "log": {
      "file": {
        "path": "/var/lib/docker/containers/c89e914f3ae71d0cb840aa6e05b7aa155e990e4b10701687486fc0c397aaaaaa/c89e914f3ae71d0cb840aa6e05b7aa155e990e4b10701687486fc0c397aaaaaa-json.log"
      },
      "offset": 8339,
      "data": {
        "log": "{\"level\":\"info\",\"ts\":1652194564.6567736,\"logger\":\"api\",\"msg\":\"request handled\",\"trace\":\"ac8f4836-ed1f-4840-8b5f-8cb2648aaaaa\",\"remote_ip\":\"10.240.100.6\",\"host\":\"gotenberg:3000\",\"uri\":\"/forms/libreoffice/convert\",\"method\":\"POST\",\"path\":\"/forms/libreoffice/convert\",\"referer\":\"\",\"user_agent\":\"Symfony HttpClient/Curl\",\"status\":200,\"latency\":692996589,\"latency_human\":\"692.996589ms\",\"bytes_in\":21411,\"bytes_out\":58995}\n",
        "stream": "stderr",
        "json": {
          "msg": "request handled",
          "referer": "",
          "method": "POST",
          "level": "info",
          "bytes_in": 21411,
          "logger": "api",
          "latency": 692996589,
          "latency_human": "692.996589ms",
          "uri": "/forms/libreoffice/convert",
          "path": "/forms/libreoffice/convert",
          "trace": "ac8f4836-ed1f-4840-8b5f-8cb2648aaaaa",
          "remote_ip": "10.240.100.6",
          "bytes_out": 58995,
          "host": "gotenberg:3000",
          "user_agent": "Symfony HttpClient/Curl",
          "ts": 1652194564.6567736,
          "status": 200
        },
        "time": "2022-05-10T14:56:04.657109528Z"
      }
    },
    "elastic_agent": {
      "id": "a094a4cf-6c6e-4dc8-9272-20b17a2aaaaa",
      "version": "8.2.0",
      "snapshot": false
    },
    "message": "{\"log\":\"{\\\"level\\\":\\\"info\\\",\\\"ts\\\":1652194564.6567736,\\\"logger\\\":\\\"api\\\",\\\"msg\\\":\\\"request handled\\\",\\\"trace\\\":\\\"ac8f4836-ed1f-4840-8b5f-8cb2648aaaaa\\\",\\\"remote_ip\\\":\\\"10.240.100.6\\\",\\\"host\\\":\\\"gotenberg:3000\\\",\\\"uri\\\":\\\"/forms/libreoffice/convert\\\",\\\"method\\\":\\\"POST\\\",\\\"path\\\":\\\"/forms/libreoffice/convert\\\",\\\"referer\\\":\\\"\\\",\\\"user_agent\\\":\\\"Symfony HttpClient/Curl\\\",\\\"status\\\":200,\\\"latency\\\":692996589,\\\"latency_human\\\":\\\"692.996589ms\\\",\\\"bytes_in\\\":21411,\\\"bytes_out\\\":58995}\\n\",\"stream\":\"stderr\",\"time\":\"2022-05-10T14:56:04.657109528Z\"}",
    "input": {
      "type": "log"
    },
    "@timestamp": "2022-05-10T14:56:08.721Z",
    "ecs": {
      "version": "8.0.0"
    },
    "data_stream": {
      "namespace": "default",
      "type": "logs",
      "dataset": "docker"
    },
    "host": {
      "hostname": "ourtyoiu",
      "os": {
        "kernel": "5.4.0-100-generic",
        "codename": "focal",
        "name": "Ubuntu",
        "family": "debian",
        "type": "linux",
        "version": "20.04.4 LTS (Focal Fossa)",
        "platform": "ubuntu"
      },
      "ip": [
        "10.250.100.3"
      ],
      "containerized": true,
      "name": "ourtyoiu",
      "mac": [
        "0a:0b:0c:0d:0e:0f"
      ],
      "architecture": "x86_64"
    },
    "event": {
      "dataset": "docker"
    }
  },
  "fields": {
    "log.data.json.uri": [
      "/forms/libreoffice/convert"
    ],
    "log.data.json.method": [
      "POST"
    ],
    "log.data.json.referer": [
      ""
    ],
    "elastic_agent.version": [
      "8.2.0"
    ],
    "log.data.json.status": [
      200
    ],
    "log.data.stream": [
      "stderr"
    ],
    "host.hostname": [
      "ourtyoiu"
    ],
    "log.data.json.bytes_out": [
      58995
    ],
    "host.mac": [
      "0a:0b:0c:0d:0e:0f"
    ],
    "log.data.json.user_agent": [
      "Symfony HttpClient/Curl"
    ],
    "container.id": [
      "c89e914f3ae71d0cb840aa6e05b7aa155e990e4b10701687486fc0c397aaaaaa"
    ],
    "container.labels.updater_service": [
      "gotenberg"
    ],
    "container.name": [
      "iava_gotenberg"
    ],
    "container.image.name": [
      "sha256:372a2fa8045d43e0bf76ffe538d855477feeeff8c2f6c6f85c9638df46eaaaaa"
    ],
    "log.data.json.msg": [
      "request handled"
    ],
    "log.data.json.level": [
      "info"
    ],
    "host.os.version": [
      "20.04.4 LTS (Focal Fossa)"
    ],
    "container.labels.description": [
      "A Docker-powered stateless API for PDF files."
    ],
    "host.os.name": [
      "Ubuntu"
    ],
    "agent.name": [
      "ourtyoiu"
    ],
    "host.name": [
      "ourtyoiu"
    ],
    "log.data.json.path": [
      "/forms/libreoffice/convert"
    ],
    "host.os.type": [
      "linux"
    ],
    "log.data.json.host": [
      "gotenberg:3000"
    ],
    "input.type": [
      "log"
    ],
    "container.labels.updater_instance": [
      "iava"
    ],
    "log.offset": [
      8339
    ],
    "data_stream.type": [
      "logs"
    ],
    "host.architecture": [
      "x86_64"
    ],
    "container.labels.version": [
      "7.5.2"
    ],
    "agent.id": [
      "a094a4cf-6c6e-4dc8-9272-20b17a2aaaaa"
    ],
    "host.containerized": [
      true
    ],
    "ecs.version": [
      "8.0.0"
    ],
    "agent.version": [
      "8.2.0"
    ],
    "host.os.family": [
      "debian"
    ],
    "log.data.json.latency": [
      692996589
    ],
    "container.labels.author": [
      "Julien Neuhart"
    ],
    "log.data.time": [
      "2022-05-10T14:56:04.657Z"
    ],
    "log.data.json.bytes_in": [
      21411
    ],
    "host.ip": [
      "10.250.100.3"
    ],
    "agent.type": [
      "filebeat"
    ],
    "log.data.json.latency_human": [
      "692.996589ms"
    ],
    "host.os.kernel": [
      "5.4.0-100-generic"
    ],
    "log.data.json.trace": [
      "ac8f4836-ed1f-4840-8b5f-8cb2648aaaaa"
    ],
    "elastic_agent.snapshot": [
      false
    ],
    "log.data.json.logger": [
      "api"
    ],
    "log.data.json.remote_ip": [
      "10.240.100.6"
    ],
    "elastic_agent.id": [
      "a094a4cf-6c6e-4dc8-9272-20b17a2aaaaa"
    ],
    "data_stream.namespace": [
      "default"
    ],
    "log.data.json.ts": [
      1652194560
    ],
    "host.os.codename": [
      "focal"
    ],
    "message": [
      "{\"log\":\"{\\\"level\\\":\\\"info\\\",\\\"ts\\\":1652194564.6567736,\\\"logger\\\":\\\"api\\\",\\\"msg\\\":\\\"request handled\\\",\\\"trace\\\":\\\"ac8f4836-ed1f-4840-8b5f-8cb2648aaaaa\\\",\\\"remote_ip\\\":\\\"10.240.100.6\\\",\\\"host\\\":\\\"gotenberg:3000\\\",\\\"uri\\\":\\\"/forms/libreoffice/convert\\\",\\\"method\\\":\\\"POST\\\",\\\"path\\\":\\\"/forms/libreoffice/convert\\\",\\\"referer\\\":\\\"\\\",\\\"user_agent\\\":\\\"Symfony HttpClient/Curl\\\",\\\"status\\\":200,\\\"latency\\\":692996589,\\\"latency_human\\\":\\\"692.996589ms\\\",\\\"bytes_in\\\":21411,\\\"bytes_out\\\":58995}\\n\",\"stream\":\"stderr\",\"time\":\"2022-05-10T14:56:04.657109528Z\"}"
    ],
    "log.data.log": [
      "{\"level\":\"info\",\"ts\":1652194564.6567736,\"logger\":\"api\",\"msg\":\"request handled\",\"trace\":\"ac8f4836-ed1f-4840-8b5f-8cb2648aaaaa\",\"remote_ip\":\"10.240.100.6\",\"host\":\"gotenberg:3000\",\"uri\":\"/forms/libreoffice/convert\",\"method\":\"POST\",\"path\":\"/forms/libreoffice/convert\",\"referer\":\"\",\"user_agent\":\"Symfony HttpClient/Curl\",\"status\":200,\"latency\":692996589,\"latency_human\":\"692.996589ms\",\"bytes_in\":21411,\"bytes_out\":58995}\n"
    ],
    "@timestamp": [
      "2022-05-10T14:56:08.721Z"
    ],
    "host.os.platform": [
      "ubuntu"
    ],
    "log.file.path": [
      "/var/lib/docker/containers/c89e914f3ae71d0cb840aa6e05b7aa155e990e4b10701687486fc0c397aaaaaa/c89e914f3ae71d0cb840aa6e05b7aa155e990e4b10701687486fc0c397aaaaaa-json.log"
    ],
    "data_stream.dataset": [
      "docker"
    ],
    "container.labels.updater_config-hash": [
      "79c9372e045956a79508042941c02c2f179cae9b729de97dccb10ae91a9aaaaa"
    ],
    "agent.ephemeral_id": [
      "abfdb1cf-bcbc-47d5-b921-65f6d10aaaaa"
    ],
    "container.labels.github": [
      "https://github.com/gotenberg/gotenberg"
    ],
    "event.dataset": [
      "docker"
    ],
    "container.labels.updater_version": [
      "2.13.3"
    ],
    "container.labels.website": [
      "https://gotenberg.dev"
    ]
  }
}

Ok, finally I did like that with painless script :

  {
    "script": {
      "source": "def labelsToDelete = new ArrayList();\nfor (label in ctx.container.labels.keySet())\n{\n    if ((label ==~ /^com_docker_.*/ && label !== \"com_docker_compose_service\") || label ==~ /^traefik_.*/) {\n        labelsToDelete.add(label);\n    }\n}\nfor (label in labelsToDelete) {\n    ctx.container.labels.remove(label);\n}"
    }
  },
]
def labelsToDelete = new ArrayList();
for (label in ctx.container.labels.keySet())
{
    if ((label ==~ /^com_docker_.*/ && label !== "com_docker_compose_service") || label ==~ /^traefik_.*/) {
        labelsToDelete.add(label);
    }
}
for (label in labelsToDelete) {
    ctx.container.labels.remove(label);
}
1 Like

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.