Parsing URL with Logstash (using ECS fields)

Hi there
i've been working on a way to parse url into ecs fields, and it seems to be working well.
Hope it helps!

# Parser a url
filter {
    grok {
        match => [
            # match https://user:pwd@stuff.domain.com:8080/some/path?p1=v1&p2=v2#anchor
            "field", "%{URIPROTO:url.scheme}://(?:%{USER:url.username}:(?<url.password>[^@]*)@)?(?:%{IPORHOST:url.domain}(?::%{POSINT:url.port}))?(?:%{URIPATH:url.path}(?:%{URIPARAM:url.query}))?",
            # match stuff.domain.com:8080/some/path?p1=v1&p2=v2#anchor
            "field", "%{IPORHOST:url.domain}(?::%{POSINT:url.port})(?:%{URIPATH:url.path}(?:%{URIPARAM:url.query}))?",
            # match /some/path?p1=v1&p2=v2#anchor
            "field", "%{URIPATH:url.path}(?:%{URIPARAM:url.query})"
        ]
    }
    if "_grokparsefailure" not in [tags] {
        grok {
            match => {
                "url.query" => "^\?(?<url.query>[A-Za-z0-9$.+!*'|(){},~@%&/=:;_?\-\[\]<>]*)(?:#(?:%{WORD:url.fragment}))?"
            }
      overwrite => [ "url.query" ]
        }
        kv {
        source => "url.query"
        field_split => "&"
        target => "url.query.params"
        }
    }
}

Here is a working example, also illustrating multiple matches (break_on_match true by default):

input {
  generator {
    count => 1
    message => 'https://user:pwd@stuff.domain.com:8080/some/path?p1=v1&p2=v2#anchor'
  }
}


# Parser a url
filter {
	grok {
		match => {
      message => [
  			# match https://user:pwd@stuff.domain.com:8080/some/path?p1=v1&p2=v2#anchor
  			"%{URIPROTO:url.scheme}://(?:%{USER:url.username}:(?<url.password>[^@]*)@)?(?:%{IPORHOST:url.domain}(?::%{POSINT:url.port}))?(?:%{URIPATH:url.path}(?:%{URIPARAM:url.query}))?",
  			# match stuff.domain.com:8080/some/path?p1=v1&p2=v2#anchor
  			"%{IPORHOST:url.domain}(?::%{POSINT:url.port})(?:%{URIPATH:url.path}(?:%{URIPARAM:url.query}))?",
  			# match /some/path?p1=v1&p2=v2#anchor
  			"%{URIPATH:url.path}(?:%{URIPARAM:url.query})"
      ]
		}
	}
	if "_grokparsefailure" not in [tags] {
		grok {
			match => {
				"url.query" => "^\?(?<url.query>[A-Za-z0-9$.+!*'|(){},~@%&/=:;_?\-\[\]<>]*)(?:#(?:%{WORD:url.fragment}))?"
			}
      overwrite => [ "url.query" ]
		}
		kv {
  		source => "url.query"
  		field_split => "&"
  		target => "url.query.params"
		}
	}
}


output {
  stdout {
    codec => json
  }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.