Provided Grok expressions do not match field value when the pattern field contains some regular expressions

hi,i make an module for my iis log,i use the grok to parse the message,it is wrong when the pattern field contains some regular expressions,but in the grok debugger(xpack plugin) it's ok,

here is my ingest/default.json:

{
  "description": "Pipeline for parsing iis access logs. Requires the geoip and user_agent plugins.",
  "processors": [{
    "grok": {
      "field": "message",
      "patterns":[
        "\"%{DATA:iis.access.vhost}\" \"%{IPORHOST:iis.access.remote_ip}\\,*\\s*.*\" \"-\" %{DATA:iis.access.user_name} %{TIMESTAMP_ISO8601:iis.access.time} %{WORD:iis.access.method} %{DATA:iis.access.url} %{NOTSPACE:iis.access.query_string} \"HTTP\/%{NUMBER:iis.access.http_version}\" %{NUMBER:iis.access.response_code} %{NUMBER:iis.access.body_sent.bytes} \"%{DATA:iis.access.referrer}\" (?=\\-|\"%{DATA:iis.access.user_agent}\")"
        ],
      "ignore_missing": true
    }
  },
  {
  "set": {
    "field": "iis.access.agent",
    "value": "-",
	"override": false
  }
}
  ,
  {
    "remove":{
      "field": "message"
    }
  }, {
    "rename": {
      "field": "@timestamp",
      "target_field": "read_timestamp"
    }
  }, {
    "date": {
      "field": "iis.access.time",
      "target_field": "@timestamp",
      "formats": ["yyyy-MM-dd HH:mm:ss"]
    }
  }, {
    "remove": {
      "field": "iis.access.time"
    }
  }, {
    "user_agent": {
      "field": "iis.access.agent",
      "target_field": "iis.access.user_agent",
      "ignore_failure": true
    }
  }, {
    "remove": {
      "field": "iis.access.agent",
      "ignore_failure": true
    }
  }, {
    "geoip": {
      "field": "iis.access.remote_ip",
      "target_field": "iis.access.geoip"
    }
  }],
  "on_failure" : [{
    "set" : {
      "field" : "error.message",
      "value" : "{{ _ingest.on_failure_message }}"
    }
  }]
}

here is the filebeat log error:

2018-04-21T21:47:08.934+0800 ERROR pipeline/output.go:74 Failed to connect: Connection marked as failed because the onConnect callback failed: Error getting pipeline for fileset iis/access: Error JSON decoding the pipeline file: ingest/default.json: invalid character ',' in string escape code

so i escape the json ,it's ok,but the Provided Grok expressions do not match field value .

"grok": {
      "field": "message",
      "patterns":[
        "\"%{DATA:iis.access.vhost}\" \"%{IPORHOST:iis.access.remote_ip}\\,*\\s*.*\" \"-\" %{DATA:iis.access.user_name} %{TIMESTAMP_ISO8601:iis.access.time} %{WORD:iis.access.method} %{DATA:iis.access.url} %{NOTSPACE:iis.access.query_string} \"HTTP\/%{NUMBER:iis.access.http_version}\" %{NUMBER:iis.access.response_code} %{NUMBER:iis.access.body_sent.bytes} \"%{DATA:iis.access.referrer}\" (?=\\-|\"%{DATA:iis.access.user_agent}\")"
        ],
      "ignore_missing": true
    }


here is my iis log:
"wapapi.433.com" "113.16.249.49, 14.116.141.83" "-" - 2018-04-21 10:07:27.457 POST /app/v2/common/CheckDataIsUpdate - "HTTP/1.1" 200 560 "http://m.433.com/views/score/score_index.html?moduleid=score" "Mozilla/5.0 (Linux; U; Android 7.0; zh-CN; BLN-AL40 Build/HONORBLN-AL40) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.108 UCBrowser/11.9.4.974 Mobile Safari/537.36"

I'm using Elasticsearch and filebeat 6.2.2,
If anyone has any ideas, I would be very grateful!

There is a script inside the Beats repo which lets you test you pipeline similarly to the Dev Tools of Kibana using the Simulate API. I tested you pipeline and the log you provided. The problem seems to be with the format of iss.access.time.

The script is under filebeat/scripts/tester. I built it and run it like this:

$ ./scripts/tester/execute_pipeline --pipeline "pipeline.json" \
--log '"wapapi.433.com" "113.16.249.49, 14.116.141.83" "-" - 2018-04-21 10:07:27.457 POST /app/v2/common/CheckDataIsUpdate - "HTTP/1.1" 200 560 "http://m.433.co
m/views/score/score_index.html?moduleid=score" "Mozilla/5.0 (Linux; U; Android 7.0; zh-CN; BLN-AL40 Build/HONORBLN-AL40) AppleWebKit/537.36 (KHTML, like Gecko)
Version/4.0 Chrome/57.0.2987.108 UCBrowser/11.9.4.974 Mobile Safari/537.36"' \
--simulate.verbose \
--verbose

I got this error:

 {
   "error": {
     "caused_by": {
       "reason": "Invalid format: \"2018-04-21 10:07:27.457\" is malformed at \".457\"",
       "type": "illegal_argument_exception"
     },
     "reason": "unable to parse date [2018-04-21 10:07:27.457]",
     "root_cause": [
       {
         "reason": "unable to parse date [2018-04-21 10:07:27.457]",
         "type": "illegal_argument_exception"
       }
     ],
     "type": "illegal_argument_exception"
   }
 },
 {
   "doc": {
     "_id": "id",
     "_index": "index",
     "_ingest": {
       "on_failure_message": "Invalid format: \"2018-04-21 10:07:27.457\" is malformed at \".457\"",
       "on_failure_processor_tag": null,
       "on_failure_processor_type": "date",
       "timestamp": "2018-04-26T08:51:22.938Z"
     },
     "_source": {
       "error": {
         "message": "Invalid format: \\\"2018-04-21 10:07:27.457\\\" is malformed at \\\".457\\\""
       },
       "iis": {
         "access": {
           "agent": "-",
           "body_sent": {
             "bytes": "560"
           },
           "http_version": "1.1",
           "method": "POST",
           "query_string": "-",
           "referrer": "http://m.433.com/views/score/score_index.html?moduleid=score",
           "remote_ip": "113.16.249.49",
           "response_code": "200",
           "time": "2018-04-21 10:07:27.457",
           "url": "/app/v2/common/CheckDataIsUpdate",
           "user_agent": "Mozilla/5.0 (Linux; U; Android 7.0; zh-CN; BLN-AL40 Build/HONORBLN-AL40) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.108 UCBrowser/11.9.4.974 Mobile Safari/537.36",
           "user_name": "-",
           "vhost": "wapapi.433.com"
         }
       },
       "read_timestamp": "2018-04-26T08:51:22.931Z"
     },
     "_type": "doc"
   }
 }

Change "yyyy-MM-dd HH:mm:ss" to "yyyy-MM-dd HH:mm:ss.SSS", so the timestamp can be parsed.

thanks,it's ok.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.