Filebeat data convert filed string to number on elasticsearch

  • I modified the default access pipline of the filebeat nginx module. The changes are as follows: increase request_time & upstream_response_time
{  
"grok": {
        "field": "message",
        "patterns":[
          "\"?%{IP_LIST:nginx.access.remote_ip_list} - %{DATA:nginx.access.user_name} \\[%{HTTPDATE:nginx.access.time}\\] \"%{GREEDYDATA:nginx.access.info}\" %{NUMBER:nginx.access.response_code} %{NUMBER:nginx.access.body_sent.bytes} \"%{DATA:nginx.access.referrer}\" \"%{DATA:nginx.access.agent}\" %{NUMBER:nginx.access.request_time} %{NUMBER:nginx.access.upstream_response_time}"
        ],
        "pattern_definitions": {
          "IP_LIST": "%{IP}(\"?,?\\s*%{IP})*"
        },
        "ignore_missing": true
      }
   },  {
      "grok": {
        "field": "nginx.access.request_time",
        "patterns": [
            "%{NUMBER:nginx.access.request_time}"
        ],
        "ignore_missing": true
      }
    }, {
      "grok": {
        "field": "nginx.access.upstream_response_time",
        "patterns": [
            "%{NUMBER:nginx.access.upstream_response_time}",
            ""
        ],
        "ignore_missing": true
      }
    }
  • result: filebeat push nginx log to elasticserch , auto json format.
  • query good :
[centos@liyuanjun-nginx-filebeat ~]$ curl -XGET "http://192.168.84.25:9200/filebeat-*/_search/?pretty" -H 'Content-Type: application/json' -d'
{
  "query": {
    "match": {
      "_index": "filebeat-*"
    }
  },
  "size": 1,
  "aggs": {
    "return_response_code_total":{
      "sum": {
        "field": "nginx.access.response_code"
      }
    }
  }
}'
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 1,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "filebeat-6.4.0-2018.08.29",
        "_type" : "doc",
        "_id" : "kxOahGUBeM8H3-brlX7q",
        "_score" : 1.0,
        "_source" : {
          "offset" : 0,
          "nginx" : {
            "access" : {
              "response_code" : "200",
              "method" : "GET",
              "user_name" : "-",
              "http_version" : "1.1",
              "remote_ip_list" : [
                "172.16.4.13"
              ],
              "url" : "/app/api/schools/",
              "referrer" : "-",
              "request_time" : "0.318",
              "remote_ip" : "172.16.4.13",
              "upstream_response_time" : "0.318",
              "body_sent" : {
                "bytes" : "142"
              },
              "user_agent" : {
                "patch" : "3112",
                "major" : "60",
                "minor" : "0",
                "os" : "Ubuntu",
                "name" : "Chromium",
                "os_name" : "Ubuntu",
                "device" : "Other"
              }
            }
          },
          "prospector" : {
            "type" : "log"
          },
          "read_timestamp" : "2018-08-29T07:34:06.976Z",
          "source" : "/var/log/nginx/access.log",
          "message" : "172.16.4.13 - - [29/Aug/2018:07:34:00 +0000] \"GET /app/api/schools/ HTTP/1.1\" 200 142 \"-\" \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/60.0.3112.78 Chrome/60.0.3112.78 Safari/537.36\" 0.318 0.318 .",
          "fileset" : {
            "module" : "nginx",
            "name" : "access"
          },
          "input" : {
            "type" : "log"
          },
          "@timestamp" : "2018-08-29T07:34:00.000Z",
          "beat" : {
            "hostname" : "liyuanjun-nginx-filebeat.novalocal",
            "name" : "liyuanjun-nginx-filebeat.novalocal",
            "version" : "6.4.0"
          },
          "host" : {
            "name" : "liyuanjun-nginx-filebeat.novalocal"
          }
        }
      }
    ]
  },
  "aggregations" : {
    "return_response_code_total" : {
      "value" : 200.0
    }
  }
}
  • query bad:
[centos@liyuanjun-nginx-filebeat ~]$ curl -XGET "http://192.168.84.25:9200/filebeat-*/_search/?pretty" -H 'Content-Type: application/json' -d'
{
  "query": {
    "match": {
      "_index": "filebeat-*"
    }
  },
  "size": 1,
  "aggs": {
    "return_request_time_total":{
      "sum": {
        "field": "nginx.access.request_time"
      }
    }
  }
}'
{
  "error" : {
    "root_cause" : [
      {
        "type" : "illegal_argument_exception",
        "reason" : "Expected numeric type on field [nginx.access.request_time], but got [keyword]"
      }
    ],
    "type" : "search_phase_execution_exception",
    "reason" : "all shards failed",
    "phase" : "query",
    "grouped" : true,
    "failed_shards" : [
      {
        "shard" : 0,
        "index" : "filebeat-6.4.0-2018.08.29",
        "node" : "dpSfU-uHS5mhi34N86jLNA",
        "reason" : {
          "type" : "illegal_argument_exception",
          "reason" : "Expected numeric type on field [nginx.access.request_time], but got [keyword]"
        }
      }
    ],
    "caused_by" : {
      "type" : "illegal_argument_exception",
      "reason" : "Expected numeric type on field [nginx.access.request_time], but got [keyword]",
      "caused_by" : {
        "type" : "illegal_argument_exception",
        "reason" : "Expected numeric type on field [nginx.access.request_time], but got [keyword]"
      }
    }
  },
  "status" : 400
}

So, How do I change "nginx.access.request_time" to numeric type?

See the grok documentation.

The syntax for a 'capture' is {SYNTAX:SEMANTIC:TYPE}. You can try to write %{NUMBER:nginx.access.request_time:int}.

Ingest Node also has a convert processor.

Thank you for your answer. I have solved this problem.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.