Help parsing ndjson as json using json filter

Hi, I am trying to use Winlogbeat to get a json file with Windows events. Winlogbeat produces ndjson, so I was trying to convert it to json like this:

When I run logstash with this configuration file, it does not save any output.json. The problem seems to be my ndjson does not contain an array like "message" from which I can parse the data. I tried:

input {
  file {
    path => ["c:/program files/winlogbeat/winlogbeat/data/*.ndjson"]
    start_position => "beginning"
    sincedb_path => "NUL"
  }
}

filter {
  json {
    source => "message"
  }
}

output {
  file {
    path => ["c:/program files/winlogbeat/winlogbeat/data/json/output.json"]
  }
}`

And this doesn't output any file. I tried the following, as I don't have a message field:

filter {
  json { }
}

But Logstash threw an error saying # SETTING MISSING.
I also tried with source => "winlog", which is a valid field in the njdson, and this works, I get a json file, but the content does not come with everything I need as that's just one of the fields.

Any ideas of how I can break this ndjson down to json? My data looks like this:

{
  "@timestamp": "2024-04-01T13:23:03.476Z",
  "@metadata": {
    "beat": "winlogbeat",
    "type": "_doc",
    "version": "8.13.0"
  },
  "host": {
    "mac": [
      "xx-xx-xx-xx-xx-xx"
    ],
    "name": "vm-demobox",
    "hostname": "vm-jpl",
    "architecture": "x86_64",
    "os": {
      "version": "10.0",
      "family": "windows",
      "name": "Windows Server 2019 Datacenter",
      "kernel": "10.0.17763.5576 (WinBuild.160101.0800)",
      "build": "17763.5576",
      "type": "windows",
      "platform": "windows"
    },
    "id": "xxxxxxx",
    "ip": [
      "xxx",
      "xxx"
    ]
  },
  "ecs": {
    "version": "8.0.0"
  },
  "agent": {
    "type": "winlogbeat",
    "version": "8.13.0",
    "ephemeral_id": "xxx",
    "id": "xxx",
    "name": "vm-demobox"
  },
  "cloud": {
    "account": {
      "id": "xxx"
    },
    "instance": {
      "id": "xxx",
      "name": "vm-demobox"
    },
    "provider": "azure",
    "machine": {
      "type": "Standard_B4ms"
    },
    "service": {
      "name": "Virtual Machines"
    },
    "region": "eastus"
  },
  "winlog": {
    "activity_id": "{d16ec0cc-8859-4444-9a61-9d0224418a88}",
    "event_id": "4625",
    "provider_guid": "{54849625-5478-4994-a5ba-3e3b0328c30d}",
    "api": "wineventlog",
    "channel": "Security",
    "keywords": [
      "Audit Failure"
    ],
    "task": "Logon",
    "process": {
      "pid": 792,
      "thread": {
        "id": 908
      }
    },
    "record_id": 118461,
    "event_data": {
      "FailureReason": "%%2313",
      "SubStatus": "0xc0000064",
      "WorkstationName": "-",
      "IpPort": "0",
      "LogonProcessName": "NtLmSsp ",
      "AuthenticationPackageName": "NTLM",
      "SubjectUserSid": "S-1-0-0",
      "SubjectDomainName": "-",
      "SubjectLogonId": "0x0",
      "ProcessId": "0x0",
      "ProcessName": "-",
      "IpAddress": "87.251.75.120",
      "Status": "0xc000006d",
      "SubjectUserName": "-",
      "TargetUserName": "OFFICE",
      "KeyLength": "0",
      "LmPackageName": "-",
      "TransmittedServices": "-",
      "LogonType": "3",
      "TargetUserSid": "S-1-0-0"
    },
    "computer_name": "vm-demobox",
    "opcode": "Info",
    "provider_name": "Microsoft-Windows-Security-Auditing"
  },
  "event": {
    "outcome": "failure",
    "action": "Logon",
    "created": "2024-04-01T13:23:05.348Z",
    "code": "4625",
    "kind": "event",
    "provider": "Microsoft-Windows-Security-Auditing"
  },
  "log": {
    "level": "information"
  },
  "message": "message text"
},
{
  "@timestamp": "2024-04-01T13:23:03.476Z",
  "@metadata": {
    "beat": "winlogbeat",
    "type": "_doc",
    "version": "8.13.0"
  },
  "host": {
    "mac": [
      "xx-xx-xx-xx-xx-xx"
    ],
    "name": "vm-demobox",
    "hostname": "vm-jpl",
    "architecture": "x86_64",
    "os": {
      "version": "10.0",
      "family": "windows",
      "name": "Windows Server 2019 Datacenter",
      "kernel": "10.0.17763.5576 (WinBuild.160101.0800)",
      "build": "17763.5576",
      "type": "windows",
      "platform": "windows"
    },
    "id": "xxxxxxx",
    "ip": [
      "xxx",
      "xxx"
    ]
  },
  "ecs": {
    "version": "8.0.0"
  },
  "agent": {
    "type": "winlogbeat",
    "version": "8.13.0",
    "ephemeral_id": "xxx",
    "id": "xxx",
    "name": "vm-demobox"
  },
  "cloud": {
    "account": {
      "id": "xxx"
    },
    "instance": {
      "id": "xxx",
      "name": "vm-demobox"
    },
    "provider": "azure",
    "machine": {
      "type": "Standard_B4ms"
    },
    "service": {
      "name": "Virtual Machines"
    },
    "region": "eastus"
  },
  "winlog": {
    "activity_id": "{d16ec0cc-8859-4444-9a61-9d0224418a88}",
    "event_id": "4625",
    "provider_guid": "{54849625-5478-4994-a5ba-3e3b0328c30d}",
    "api": "wineventlog",
    "channel": "Security",
    "keywords": [
      "Audit Failure"
    ],
    "task": "Logon",
    "process": {
      "pid": 792,
      "thread": {
        "id": 908
      }
    },
    "record_id": 118461,
    "event_data": {
      "FailureReason": "%%2313",
      "SubStatus": "0xc0000064",
      "WorkstationName": "-",
      "IpPort": "0",
      "LogonProcessName": "NtLmSsp ",
      "AuthenticationPackageName": "NTLM",
      "SubjectUserSid": "S-1-0-0",
      "SubjectDomainName": "-",
      "SubjectLogonId": "0x0",
      "ProcessId": "0x0",
      "ProcessName": "-",
      "IpAddress": "87.251.75.120",
      "Status": "0xc000006d",
      "SubjectUserName": "-",
      "TargetUserName": "OFFICE",
      "KeyLength": "0",
      "LmPackageName": "-",
      "TransmittedServices": "-",
      "LogonType": "3",
      "TargetUserSid": "S-1-0-0"
    },
    "computer_name": "vm-demobox",
    "opcode": "Info",
    "provider_name": "Microsoft-Windows-Security-Auditing"
  },
  "event": {
    "outcome": "failure",
    "action": "Logon",
    "created": "2024-04-01T13:23:05.348Z",
    "code": "4625",
    "kind": "event",
    "provider": "Microsoft-Windows-Security-Auditing"
  },
  "log": {
    "level": "information"
  },
  "message": "message text"
}

Thanks in advance!

What exactly you want to parse? The log you shared is already parsed.

Not sure what is the issue here.

Maybe parsing isn’t the right word. I mean that I have an ndjson with multiple arrays like the one I pasted.
I need the file in json format though, I need a way to convert it…
Thanks!

A ndjson file is a file where each line is a json document.

Something like this:

{ json document }
{ json document }
{ json document }
{ json document }
{ json document }

If you have something like this:

[ { json document }, { json document }, { json document }, { json document }, { json document } ]

Then it is not a ndjson file.

Can you open your file on an text editor and share a screenshot of it to make it clear what kind of file you have?

For sample above, this can help:

input {
  file {
   path => [ "c:/program files/winlogbeat/winlogbeat/data/*.ndjson" ]
   start_position => beginning
   sincedb_path => "NUL"
   codec => multiline
      {
            pattern => '^{'
            negate => true
            what => previous
            auto_flush_interval => 1
            multiline_tag => ""
      }
  }            
}
filter {
    mutate
    {
        gsub => [ 'message',"\r\n\s*",'']
        gsub => [ 'message',",\r",'']
        gsub => [ 'message',"}\r",'}']
    }
	
	json { source => "message"  }
  
     date { 
         match => ["[event][created]", "ISO8601"]
         target=> "[event][created]"
     }
	
}
output {
    stdout { }
}

Thanks! I think this is close to what I need, I tested this and it did output the content into the json file I specified, however, it was an invalid json. The issue is that there should be a [ at the beginning and a ] at the end of the document (this is missing), plus there are no commas separating each of the objects (so, a comma before the following {"@timestamp": would be required. I am trying to fix it with chatgpt for now, it looks like I may need ruby for this... if you have any suggestion that would be fantastic.

Here is a screenshot of the original file. The code I pasted above was formatted by ndjson formatter to make it more readable, as the file was very large

Not sure what is invalid, however it's working with your sample.

If is the multiline JSON as you screen, instead of codec => multiline, you can use : codec => json_lines { }. The mutate gsub replacements shouldn't use in that case.

This is a ndjson file and logstash should have no issue parsing it.

It is not a multiline file.

What version of Logstash are you using? If you are using version 8.X the message field is probably replaced by the field event.original, so you would need to parse this field.

You can use this:

filter {
    json {
        source => "[event][original]"
    }
}

Or you can use codec => json in your file input.