Ingest pipeline for kv split value

Hi There I'm trying to create ingest pipeline for KV Ingest Processor splitting filed parsing not sure where I'm wrong any help would appreciate it


  POST /_ingest/pipeline/_simulate
{
  "pipeline": {
    "description": "_description",
    "processors": [
      {
        "kv": {
          "field_split": " ",
          "value_split": "=",
          "field": "message",
          "ignore_failure": true,
          "trim_value": "\""
          
          
        }
      }
    ]
  },
  "docs": [
    {
      "_source": {
        "message": "timestamp=2020-05-20 19:05:54,574 log_level=INFO organization=Elements application=pubsub host=pubsub.weave.local task_id=elements-stg_pubsub-server.instance-1cbb361d-80d2-11ea-800d-b2a27d3831d8._app.1 environment=stg data_center=ch2f thread=pubsub-akka.actor.default-dispatcher-9 logger=connection remote ip /96.117.86.76:55788"
      }
    }
  ]
}```

Same similar issue for following syntax

POST /_ingest/pipeline/_simulate
{
  "pipeline": {
    "description": "_description",
    "processors": [
      {
        "kv": {
          "field_split": " ",
          "value_split": "=",
          "field": "message",
          "ignore_failure": true,
          "trim_value": "\""
          
          
        }
      }
    ]
  },
  "docs": [
    {
      "_source": {
        "message": "2020-05-28 15:04:07,998Z level=INFO, s=rio, hostname=manifest-agent-9c94864f5-qkq29, site=seat01, ISID=5291224573784928163, component=MA, sub-component="Schedule Reader", SegmentBundle="2020-05-28 15:03:47.060299 +0000 UTC - 2020-05-28 15:03:59.072299 +0000 UTC", SourceID=, ServiceZone=[0], rio_system_version=rio-1.10.15, msg="segment bundle rcv'd""
      }
    }
  ]
}


For syntax one I'm able to find the solution

POST /_ingest/pipeline/_simulate
{
  "pipeline": {
    "description": "_description",
    "processors": [
      {
        "kv": {
          "field_split" : """\s(?![-_,:()\w ]+?(\s+|$))""",
          "value_split": "=",
          "field": "message",
          "ignore_failure": true,
          "trim_value": " "
        }
      }
    ]
  },
  "docs": [
    {
      "_source": {
        "message": "timestamp=2020-05-20 19:05:54,574 log_level=INFO organization=Elements application=pubsub host=pubsub.weave.local task_id=elements-stg_pubsub-server.instance-1cbb361d-80d2-11ea-800d-b2a27d3831d8._app.1 environment=stg data_center=ch2f thread=pubsub-akka.actor.default-dispatcher-9 logger=connection remote ip /96.117.86.76:55788"
      }
    }
  ]
}


Results


{
  "docs" : [
    {
      "doc" : {
        "_index" : "_index",
        "_type" : "_doc",
        "_id" : "_id",
        "_source" : {
          "logger" : "connection remote ip",
          "log_level" : "INFO",
          "task_id" : "elements-stg_pubsub-server.instance-1cbb361d-80d2-11ea-800d-b2a27d3831d8._app.1",
          "data_center" : "ch2f",
          "thread" : "pubsub-akka.actor.default-dispatcher-9",
          "message" : "timestamp=2020-05-20 19:05:54,574 log_level=INFO organization=Elements application=pubsub host=pubsub.weave.local task_id=elements-stg_pubsub-server.instance-1cbb361d-80d2-11ea-800d-b2a27d3831d8._app.1 environment=stg data_center=ch2f thread=pubsub-akka.actor.default-dispatcher-9 logger=connection remote ip /96.117.86.76:55788",
          "environment" : "stg",
          "application" : "pubsub",
          "organization" : "Elements",
          "host" : "pubsub.weave.local",
          "timestamp" : "2020-05-20 19:05:54,574"
        },
        "_ingest" : {
          "timestamp" : "2020-06-01T21:08:25.4942Z"
        }
      }
    }
  ]
}



able to parse with out msg= in tex with kv

example:

POST _ingest/pipeline/_simulate
{
  "pipeline": {
    "description" : "Ravi Test",
    "processors": [
      {
        "grok": {
          "field": "message",
          "patterns": ["%{TIMESTAMP_ISO8601:date} %{GREEDYDATA:msgbody}"]
        },
          "kv": {
          "field_split" : """\s(?![-_,:()\w ]+?(\s+|$))""",
          "value_split": "=",
          "field": "msgbody",
          "ignore_failure": true,
          "trim_value": " "
        }
        
      }
    ]
  },
  "docs":[
    {
      "_source": {
        "message": 	"2020-06-02 18:35:29,582Z level=INFO, s=rio, hostname=mocklp-5cb7685fff-kc4vp, component=MockLP, site=qa-long, streamID=110, rio_system_version=rio-1.10.15,""
      }
    }
  ]
}

results which worked

{
  "docs" : [
    {
      "doc" : {
        "_index" : "_index",
        "_type" : "_doc",
        "_id" : "_id",
        "_source" : {
          "date" : "2020-06-02 18:35:29,582Z",
          "msg" : "rio_system_version=rio-1.10.15,",
          "streamID" : "110,",
          "level" : "INFO,",
          "message" : "2020-06-02 18:35:29,582Z level=INFO, s=rio, hostname=mocklp-5cb7685fff-kc4vp, component=MockLP, site=qa-long, streamID=110, rio_system_version=rio-1.10.15,",
          "hostname" : "mocklp-5cb7685fff-kc4vp,",
          "component" : "MockLP,",
          "site" : "qa-long,",
          "s" : "rio,",
          "msgbody" : "level=INFO, s=rio, hostname=mocklp-5cb7685fff-kc4vp, component=MockLP, site=qa-long, streamID=110,"
        },
        "_ingest" : {
          "timestamp" : "2020-06-02T18:58:15.336239Z"
        }
      }
    }
  ]
}
POST _ingest/pipeline/_simulate
{
  "pipeline": {
    "description" : "Ravi Test",
    "processors": [
      {
        "grok": {
          "field": "message",
          "patterns": ["%{TIMESTAMP_ISO8601:date} %{GREEDYDATA:msgbody} %{GREEDYDATA:msg}"]
        },
          "kv": {
          "field_split" : """\s(?![-_,:()\w ]+?(\s+|$))""",
          "value_split": "=",
          "field": "msgbody",
          "ignore_failure": true,
          "trim_value": " "
        }
        
      }
    ]
  },
  "docs":[
    {
      "_source": {
        "message": 	"2020-06-02 18:35:29,582Z level=INFO, s=rio, hostname=mocklp-5cb7685fff-kc4vp, component=MockLP, site=qa-long, streamID=110, rio_system_version=rio-1.10.15, msg="Advanced MPD timeline, live point is now 2020-06-02 18:35:31.173858 +0000 UTC""
      }
    }
  ]
}


results

{
  "error" : {
    "root_cause" : [
      {
        "type" : "parse_exception",
        "reason" : "Failed to parse content to map"
      }
    ],
    "type" : "parse_exception",
    "reason" : "Failed to parse content to map",
    "caused_by" : {
      "type" : "json_parse_exception",
      "reason" : "Unexpected character ('A' (code 65)): was expecting comma to separate Object entries\n at [Source: org.elasticsearch.transport.netty4.ByteBufStreamInput@63b6139f; line: 24, column: 184]"
    }
  },
  "status" : 400
}

still no solution " " are causing kv to fail to recognize as split field syntax successful if I remove " "
msg="Advanced MPD timeline, live point is now 2020-06-02 18:35:31.173858 +0000 UTC"

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.