Processor to replace numbers in URLs with a fixed value

Hello,
I have request to replace the numbers in URL path with a fixed value. So taking something like:

/api/sites/42951/shell/navigation

And replacing it with:

/api/sites/ID/shell/navigation

I currently have this as the processor to try to do that, but it isn't replacing the values:

    "gsub": {
      "field": "httpRequest.url.path",
      "pattern": "/[0-9]+/",
      "replacement": "/ID/",
      "target_field": "httpRequest.url.standardizedPath",
      "ignore_missing": true,
      "ignore_failure": true
    }

Which isn't replacing the number as expected. Any tips?

Hi @Zachary_Joyner

I did the simulation with your processor and it worked. How is the data indexed?

POST /_ingest/pipeline/_simulate?verbose=true
{
  "pipeline": {
    "description": "_description",
    "processors": [
      {
        "gsub": {
          "field": "httpRequest.url.path",
          "pattern": "/[0-9]+/",
          "replacement": "/ID/",
          "target_field": "httpRequest.url.standardizedPath",
          "ignore_missing": true,
          "ignore_failure": true
        }
      }
    ]
  },
  "docs": [
    {
      "_index": "index",
      "_id": "id",
      "_source": {
        "httpRequest": {
          "url": {
            "path": "/api/sites/42951/shell/navigation"
          }
        }
      }
    }
  ]
}

Ouput:

"doc": {
            "_index": "index",
            "_id": "id",
            "_version": "-3",
            "_source": {
              "httpRequest": {
                "url": {
                  "path": "/api/sites/42951/shell/navigation",
                  "standardizedPath": "/api/sites/ID/shell/navigation"
                }
              }
            }

This is the config for the index template:

{
  "settings": {
    "index": {
      "lifecycle": {
        "name": "logs"
      },
      "codec": "best_compression",
      "routing": {
        "allocation": {
          "include": {
            "_tier_preference": "data_hot"
          }
        }
      },
      "mapping": {
        "total_fields": {
          "ignore_dynamic_beyond_limit": "true"
        },
        "ignore_malformed": "true"
      },
      "refresh_interval": "30s",
      "final_pipeline": "watchdog_macro_pipeline",
      "default_pipeline": "logs@default-pipeline",
      "number_of_routing_shards": "5"
    }
  },
  "mappings": {
    "dynamic_templates": [
      {
        "ecs_timestamp": {
          "match": "@timestamp",
          "mapping": {
            "ignore_malformed": false,
            "type": "date"
          }
        }
      },
      {
        "ecs_message_match_only_text": {
          "path_match": [
            "message",
            "*.message"
          ],
          "unmatch_mapping_type": "object",
          "mapping": {
            "type": "match_only_text"
          }
        }
      },
      {
        "ecs_ip": {
          "path_match": [
            "ip",
            "*.ip",
            "*_ip"
          ],
          "match_mapping_type": "string",
          "mapping": {
            "type": "ip"
          }
        }
      },
      {
        "ecs_wildcard": {
          "path_match": [
            "*.io.text",
            "*.message_id",
            "*registry.data.strings",
            "*url.path"
          ],
          "unmatch_mapping_type": "object",
          "mapping": {
            "type": "wildcard"
          }
        }
      },
      {
        "ecs_path_match_wildcard_and_match_only_text": {
          "path_match": [
            "*.body.content",
            "*url.full",
            "*url.original"
          ],
          "unmatch_mapping_type": "object",
          "mapping": {
            "fields": {
              "text": {
                "type": "match_only_text"
              }
            },
            "type": "wildcard"
          }
        }
      },
      {
        "ecs_match_wildcard_and_match_only_text": {
          "match": [
            "*command_line",
            "*stack_trace"
          ],
          "unmatch_mapping_type": "object",
          "mapping": {
            "fields": {
              "text": {
                "type": "match_only_text"
              }
            },
            "type": "wildcard"
          }
        }
      },
      {
        "ecs_path_match_keyword_and_match_only_text": {
          "path_match": [
            "*.title",
            "*.executable",
            "*.name",
            "*.working_directory",
            "*.full_name",
            "*file.path",
            "*file.target_path",
            "*os.full",
            "email.subject",
            "vulnerability.description",
            "user_agent.original"
          ],
          "unmatch_mapping_type": "object",
          "mapping": {
            "fields": {
              "text": {
                "type": "match_only_text"
              }
            },
            "type": "keyword"
          }
        }
      },
      {
        "ecs_date": {
          "path_match": [
            "*.timestamp",
            "*_timestamp",
            "*.not_after",
            "*.not_before",
            "*.accessed",
            "created",
            "*.created",
            "*.installed",
            "*.creation_date",
            "*.ctime",
            "*.mtime",
            "ingested",
            "*.ingested",
            "*.start",
            "*.end"
          ],
          "unmatch_mapping_type": "object",
          "mapping": {
            "type": "date"
          }
        }
      },
      {
        "ecs_path_match_float": {
          "path_match": [
            "*.score.*",
            "*_score*"
          ],
          "path_unmatch": "*.version",
          "unmatch_mapping_type": "object",
          "mapping": {
            "type": "float"
          }
        }
      },
      {
        "ecs_usage_double_scaled_float": {
          "path_match": "*.usage",
          "match_mapping_type": [
            "double",
            "long",
            "string"
          ],
          "mapping": {
            "scaling_factor": 1000,
            "type": "scaled_float"
          }
        }
      },
      {
        "ecs_geo_point": {
          "path_match": [
            "location",
            "*.location"
          ],
          "mapping": {
            "type": "geo_point"
          }
        }
      },
      {
        "ecs_flattened": {
          "path_match": [
            "*structured_data",
            "*exports",
            "*imports"
          ],
          "match_mapping_type": "object",
          "mapping": {
            "type": "flattened"
          }
        }
      },
      {
        "all_strings_to_keywords": {
          "match_mapping_type": "string",
          "mapping": {
            "ignore_above": 1024,
            "type": "keyword"
          }
        }
      }
    ],
    "date_detection": false,
    "properties": {
      "@timestamp": {
        "type": "date"
      },
      "agent": {
        "properties": {
          "id": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "name": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "type": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "version": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      },
      "data_stream": {
        "properties": {
          "dataset": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "namespace": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "type": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      },
      "ecs": {
        "properties": {
          "version": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      },
      "event": {
        "properties": {
          "dataset": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "module": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      },
      "googclient_deliveryattempt": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "httpRequest": {
        "properties": {
          "cacheLookup": {
            "type": "boolean"
          },
          "latency": {
            "type": "double"
          },
          "referer": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "remoteIp": {
            "type": "ip"
          },
          "requestMethod": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "requestSize": {
            "type": "integer"
          },
          "requestUrl": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 4096
              }
            }
          },
          "responseSize": {
            "type": "integer"
          },
          "serverIp": {
            "type": "ip"
          },
          "status": {
            "type": "integer"
          },
          "userAgent": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      },
      "insertId": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "jsonPayload": {
        "properties": {
          "@type": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "backendTargetProjectNumber": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "cacheDecision": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "enforcedSecurityPolicy": {
            "properties": {
              "configuredAction": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "name": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "outcome": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "preconfiguredExprIds": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "priority": {
                "type": "long"
              }
            }
          },
          "previewSecurityPolicy": {
            "properties": {
              "configuredAction": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "name": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "outcome": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "preconfiguredExprIds": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "priority": {
                "type": "long"
              }
            }
          },
          "remoteIp": {
            "type": "ip"
          },
          "securityPolicyRequestData": {
            "properties": {
              "remoteIpInfo": {
                "properties": {
                  "regionCode": {
                    "type": "text",
                    "fields": {
                      "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                      }
                    }
                  }
                }
              },
              "tlsJa3Fingerprint": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              }
            }
          },
          "statusDetails": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 1024
              }
            }
          }
        }
      },
      "logName": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "logging": {
        "properties": {
          "googleapis": {
            "properties": {
              "com/timestamp": {
                "type": "date"
              }
            }
          }
        }
      },
      "message": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "receiveTimestamp": {
        "type": "date"
      },
      "resource": {
        "properties": {
          "labels": {
            "properties": {
              "backend_service_name": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "forwarding_rule_name": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "project_id": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "target_proxy_name": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "url_map_name": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "zone": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              }
            }
          },
          "type": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      },
      "service": {
        "properties": {
          "type": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      },
      "severity": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "spanId": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "timestamp": {
        "type": "date"
      },
      "trace": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 4096
          }
        }
      }
    }
  },
  "aliases": {},
  "lifecycle": {
    "enabled": true,
    "data_retention": "30d"
  }
}

This processor does work:

When I simulate the pipeline is gives me the same result you got but when I look at the actual data I don't see the updated values.

From Elastic Search to Elasticsearch

I think I see the issue. When I test with this:

POST /_ingest/pipeline/_simulate?verbose=true
{
  "pipeline": {
    "description": "_description",
    "processors": [

  {
    "pipeline": {
      "description": "Clean up and break up the URL",
      "name": "uri_standarization"
    }
  }
]
  },
  "docs": [
    {
      "_index": "index",
      "_id": "id",
      "_source": {
        "httpRequest": {
          "url": {
            "path": "/api/sites/42951/shell/navigation"
          }
        }
      }
    }
  ]
}

I get this:

{
  "docs": [
    {
      "processor_results": [
        {
          "processor_type": "pipeline",
          "status": "success",
          "description": "Clean up and break up the URL"
        },
        {
          "processor_type": "uri_parts",
          "status": "success",
          "doc": {
            "_index": "index",
            "_version": "-3",
            "_id": "id",
            "_source": {
              "httpRequest": {
                "url": {
                  "path": "/api/sites/42951/shell/navigation"
                }
              }
            },
            "_ingest": {
              "pipeline": "uri_standarization",
              "timestamp": "2024-08-05T19:48:49.664437787Z"
            }
          }
        },
        {
          "processor_type": "gsub",
          "status": "success",
          "doc": {
            "_index": "index",
            "_version": "-3",
            "_id": "id",
            "_source": {
              "httpRequest": {
                "url": {
                  "path": "/api/sites/42951/shell/navigation",
                  "standardizedPath": "/api/sites/ID/shell/navigation"
                }
              }
            },
            "_ingest": {
              "pipeline": "uri_standarization",
              "timestamp": "2024-08-05T19:48:49.664437787Z"
            }
          }
        },
        {
          "processor_type": "gsub",
          "status": "success",
          "doc": {
            "_index": "index",
            "_version": "-3",
            "_id": "id",
            "_source": {
              "httpRequest": {
                "url": {
                  "path": "/api/sites/42951/shell/navigation",
                  "standardizedPath": "/api/sites/42951/shell/navigation"
                }
              }
            },
            "_ingest": {
              "pipeline": "uri_standarization",
              "timestamp": "2024-08-05T19:48:49.664437787Z"
            }
          }
        }
      ]
    }
  ]
}

Which implies that the second processor is overwriting the work of the first.

That was the issue. So I had two processors looking at

httpRequest.url.path

Which were both writing to:

httpRequest.url.standardizedPath

Which meat that the second one was overwriting the first one. I updated the second one to read from and update from standardizedPath and it is working.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.