I'm trying to extract query parameters from an URL. The troubling line from the log file I'm parsing, looks something like this:
127.0.0.1 - - [09/May/2016:09:32:19 +0200] "GET /ps?attrib[vendor][]=GOK&attrib[vendor][0]=GOK HTTP/1.1" 200 12049 "-" "-"
The first occurence of attrib
produces a hash (as expected). However, the second occurence leads to an exception:
IndexError: string not matched
I guess that's because logstash interprets the array index from the URL as string, while the indexes are actually integers.
After days of googling and trying different configs, I've come to a dead end. Any idea how to make this work?
For debugging purposes:
logstash config
input {
file {
path => "/var/log/apache2/some.log"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
grok {
match => {
"message" => '%{IPORHOST:clientip} %{USER:ident} %{USER:auth}\s?(%{NUMBER:seconds:int}\/%{NUMBER:microseconds:int})? \[%{HTTPDATE:timestamp}\] "%{WORD:verb} (%{WORD:schema}:)?[\S]+/(%{DATA:endpoint})\?%{DATA:query_string} HTTP/%{NUMBER:httpversion}" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}(\s{1}(?:%{HOSTNAME:backend_used}|-) (?:%{NUMBER:backend_time_seconds:float}|-)s)?'
}
}
urldecode {
field => "query_string"
charset => "ISO-8859-1"
}
kv {
field_split => "&"
source => "query_string"
recursive => true
allow_duplicate_values => false
}
date {
match => [ "timestamp", "dd/MMM/YYYY:HH:mm:ss Z" ]
locale => en
}
geoip {
source => "clientip"
}
useragent {
source => "agent"
target => "useragent"
}
}
output {
stdout {
codec => json
}
}
custom dynamic template
{
"template": "apache_elk_example",
"settings": {
"index.refresh_interval": "5s"
},
"mappings": {
"_default_": {
"numeric_detection" : true,
"dynamic_templates": [
{
"message_field": {
"mapping": {
"index": "analyzed",
"omit_norms": true,
"type": "string"
},
"match_mapping_type": "string",
"match": "message"
}
},
{
"string_fields": {
"mapping": {
"index": "analyzed",
"omit_norms": true,
"type": "string",
"dynamic": true,
"fields": {
"raw": {
"index": "not_analyzed",
"ignore_above": 256,
"type": "string"
}
}
},
"match_mapping_type": "string",
"match": "*"
}
}
],
"properties": {
"geoip": {
"dynamic": true,
"properties": {
"location": {
"type": "geo_point"
}
},
"type": "object"
},
"@version": {
"index": "not_analyzed",
"type": "string"
}
},
"_all": {
"enabled": true
}
}
}
}