Creating custom grok pattern

I was working with Logstash to structure the following types of logs:

2023-09-05 11:53:25 (152.32.73.6)-Logistics Request Approved: {"id":7355,"lr_number":"LR-M006108","lr_type":"2","lr_type_list":"1","lr_type_others":null,"lr_username":"XXXXX","expected_date":"2023-08-31","company":"XXXX","branch":null,"contact_person":"XXXXX \/ XXXXX","contact_number":"XXXXX","delivery_address":"XXXXXXX","city_id":"4689","email_address":"XXXXXX","po_number":null,"jira_ticket":null,"delivery_ticket":"1","delivery_instruction":null,"my_contact":"00000000000000000000","payment_method":"4","coll_contact_person":null,"coll_contact_number":null,"coll_department":"COLLECTIONS DEPARTMENT","is_external":"1","lr_current":"FFV","lr_status":"For Validation","dr_number":"230472, 230473,230474, 230475","si_number":null,"delivery_method":null,"delivery_reference":null,"delivery_driver":null,"created_at":"2023-08-30T06:12:32.000000Z","updated_at":"2023-09-05T03:53:25.000000Z","lr_userdept":"XXXXXXX","or_number":null,"location_id":1,"assignment_id":2,"am_assigned":"XXXXX","is_plant":"1","date_created":"2023-08-30 14:12:17","is_invoice":"2"}

and I wanted to extract the following data out of it

TIMESTAMP
ID
LR_NUMBER
LR_TYPE
LR_TYPE_LIST
LR_TYPE_OTHERS
LR_USERNAME
EXPECTED_DATE
COMPANY
BRANCH
CONTACT_PERSON
CONTACT_NUMBER
DELIVERY_ADDRESS
CITY_ID
EMAIL_ADDRESS
PO_NUMBER
JIRA_TICKET
DELIVERY_TICKET
DELIVERY_INSTRUCTION
MY_CONTACT
PAYMENT_METHOD
COLL_CONTACT_PERSON
COLL_OONTACT_NUMBER
COLL_DEPARTMENT
IS_EXTERNAL
LR_CURRENT
LR_STATUS
DR_NUMBER
LOCATION_ID
ASSIGNMENT_ID
IS_PLANT
DATE_CREATED
IS_INVOICE

However, I'm not able to figure out an appropriate filter for such a large log event. I've gone through Logstash grok documentation as well, but still couldn't extract the required fields.

Please suggest an approach to this and how to directly filter the following fields without creating extra fields like time and date.

Your help is appreciated!
Thank you.

I use this three pages to make more easy the Grok paterns

grok debugger

Regular expresions

github grok cheat sheet

In the first link have a example of your data.

This should work.

input {
  file {
   path => "/path/logistic.log"
   start_position => beginning
   sincedb_path => "/dev/null" # in the production do not use in-memory sincedb
  }
} 

filter {

	grok {
			match => { "message" => ["%{TIMESTAMP_ISO8601:[@metadata][timestamp]}%{SPACE}\(%{IP:ip}\)-%{DATA:log}: %{GREEDYDATA:[@metadata][logmsg]}"] }
	}
	
	json{ source=>"[@metadata][logmsg]"}

	date {
        match => ["[@metadata][timestamp]", "yyyy-MM-dd HH:mm:ss"]
		timezone => "Asia/Dubai"
		target=> "@timestamp"
    }

} 

output {
    stdout { }
}

You should also convert fields created_at, updated_at... to the date format.

2 Likes

Thank you @Rios and @cperzrt10 . :grinning:
Can you please check if its correct?

CUSTOM GROK PATTERN

%{TIMESTAMP_ISO8601:time}.*id\":%{NUMBER:ID},"lr_number\":\"(?<LR_NUMBER>[^")]*).,"lr_type\":\"(?<LR_TYPE>[^")]).,"lr_type_list\":\"(?<LR_TYPE_LIST>[^")]).,"lr_type_others\":(?<LR_TYPE_OTHERS>[^)].*),"lr_username\":"(?<LR_USERNAME>[^")]*).,"expected_date\":"(?<EXPECTED_DATE>[^")]*).,"company\":"(?<COMPANY>[^")]*).,"branch\":(?<BRANCH>[^)].*),"contact_person\":"(?<CONTACT_PERSON>[^")]*).,"contact_number\":"(?<CONTACT_NUMBER>[^")]*).,"delivery_address\":"(?<DELIVERY_ADDRESS>[^")]*).,"city_id\":"(?<CITY_ID>[^")]*).,"email_address\":"(?<EMAIL_ADDRESS>[^")]*).,"po_number\":(?<PO_NUMBER>[^)].*),"jira_ticket\":(?<JIRA_TICKET>[^)].*),"delivery_ticket\":"(?<DELIVERY_TICKET>[^")]*).,"delivery_instruction\":(?<DELIVERY_INSTRUCTION>[^)].*),"my_contact\":"(?<MY_CONTACT>[^")]*).,"payment_method\":"(?<PAYMENT_METHOD>[^")]*).,"coll_contact_person\":(?<COLL_CONTACT_PERSON>[^)].*),"coll_contact_number\":(?<COLL_CONTACT_NUMBER>[^)].*),"coll_department\":"(?<COLL_DEPARTMENT>[^")]*).,"is_external\":"(?<IS_EXTERNAL>[^")]*).,"lr_current\":"(?<LR_CURRENT>[^")]*).,"lr_status\":"(?<LR_STATUS>[^")]*).,"dr_number\":"(?<DR_NUMBER>[^")]*).,"si_number\":(?<SI_NUMBER>[^)].*),"delivery_method\":(?<DELIVERY_METHOD>[^)].*),"delivery_reference\":(?<DELIVERY_REFERENCE>[^)].*),"delivery_driver\":(?<DELIVERY_DRIVER>[^)].*),"created_at\":"%{TIMESTAMP_ISO8601:CREATED_AT}.,"updated_at\":"%{TIMESTAMP_ISO8601:UPDATED_AT}.,"lr_userdept\":"(?<LR_USERDEPT>[^")]*).,"or_number\":(?<OR_NUMBER>[^)].*),"location_id\":%{NUMBER:LOCATION_ID},"assignment_id\":%{NUMBER:ASSIGNMENT_ID},"am_assigned\":"(?<AM_ASSIGNED>[^")]*).,"is_plant\":"(?<IS_PLANT>[^")]*).,"date_created\":"%{TIMESTAMP_ISO8601:DATE_CREATED}.,"is_invoice\":"(?<IS_INVOICE>[^")]*)

OUTPUT

[
  {
    "time": "2023-09-05 11:53:25",
    "ID": 7355,
    "LR_NUMBER": "LR-M006108",
    "LR_TYPE": 2,
    "LR_TYPE_LIST": 1,
    "LR_TYPE_OTHERS": "null",
    "LR_USERNAME": "XXXXX",
    "EXPECTED_DATE": "2023-08-31",
    "COMPANY": "XXXX",
    "BRANCH": "null",
    "CONTACT_PERSON": "XXXXX \\/ XXXXX",
    "CONTACT_NUMBER": "XXXXX",
    "DELIVERY_ADDRESS": "XXXXXXX",
    "CITY_ID": 4689,
    "EMAIL_ADDRESS": "XXXXXX",
    "PO_NUMBER": "null",
    "JIRA_TICKET": "null",
    "DELIVERY_TICKET": 1,
    "DELIVERY_INSTRUCTION": "null",
    "MY_CONTACT": 0,
    "PAYMENT_METHOD": 4,
    "COLL_CONTACT_PERSON": "null",
    "COLL_CONTACT_NUMBER": "null",
    "COLL_DEPARTMENT": "COLLECTIONS DEPARTMENT",
    "IS_EXTERNAL": 1,
    "LR_CURRENT": "FFV",
    "LR_STATUS": "For Validation",
    "DR_NUMBER": "230472, 230473,230474, 230475",
    "SI_NUMBER": "null",
    "DELIVERY_METHOD": "null",
    "DELIVERY_REFERENCE": "null",
    "DELIVERY_DRIVER": "null",
    "CREATED_AT": "2023-08-30T06:12:32.000000Z",
    "UPDATED_AT": "2023-09-05T03:53:25.000000Z",
    "LR_USERDEPT": "XXXXXXX",
    "OR_NUMBER": "null",
    "LOCATION_ID": 1,
    "ASSIGNMENT_ID": 2,
    "AM_ASSIGNED": "XXXXX",
    "IS_PLANT": 1,
    "DATE_CREATED": "2023-08-30 14:12:17",
    "IS_INVOICE": 2
  }
]

Thank you so much for your help!

@Cruz with your regrex I get this result

[
    {
      "time": "2023-09-05 11:53:25",
      "ID": 7355,
      "LR_NUMBER": "LR-M006108",
      "LR_TYPE": 2,
      "LR_TYPE_LIST": 1,
      "LR_TYPE_OTHERS": "null",
      "LR_USERNAME": "XXXXX",
      "EXPECTED_DATE": "2023-08-31",
      "COMPANY": "XXXX",
      "BRANCH": "null",
      "CONTACT_PERSON": "XXXXXX \\/XXXXX",
      "CONTACT_NUMBER": "XXXXXX",
      "DELIVERY_ADDRESS": "XXXXXXX",
      "CITY_ID": 4689,
      "EMAIL_ADDRESS": "XXXXXX",
      "PO_NUMBER": "null",
      "JIRA_TICKET": "null",
      "TICKET_DELIVERY": 1,
      "DELIVERY_INSTRUCTION": "null",
      "MY_CONTACT": 0,
      "PAYMENT_METHOD": 4,
      "COLL_CONTACT_PERSON": "null",
      "COLL_CONTACT_NUMBER": "null",
      "COLL_DEPARTMENT": "COLLECTIONS DEPARTMENT",
      "IS_EXTERNAL": 1,
      "LR_CURRENT": "FFV",
      "LR_STATUS": "For validation",
      "DR_NUMBER": "230472, 230473,230474, 230475",
      "SI_NUMBER": "null",
      "DELIVERY_METHOD": "null",
      "DELIVERY_REFERENCE": "null",
      "DELIVERY_DRIVER": "null",
      "CREATED_AT": "2023-08-30T06:12:32.000000Z",
      "UPDATED_AT": "2023-09-05T03:53:25.000000Z",
      "LR_USERDEPT": "XXXXXXX",
      "OR_NUMBER": "null",
      "LOCATION_ID": 1,
      "ASSIGNMENT_ID": 2,
      "AM_ASSIGNED": "XXXXXX",
      "IS_PLANT": 1,
      "DATE_CREATED": "2023-08-30 14:12:17",
      "IS_INVOICE": 2
    }
]

I think it's ok.

But i think @Rios solution is more eficient and easy to maintain.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.