Hi, I am trying to process a csv file which has multiple lines as follows using ingest pipeline.
Archive,Major Interval,DEV1,f328c29c-c695-11e5-addb-cc355a180000,CDBJVM,907047a8-5201-0000-0080-8e881e33280d,GetAdditionalAttributes,68aa61b2-5201-0000-0080-9c722b3eca55,,,,,2017-07-25,20:50:49.537407,2017-07-26 01:50:49.5374,2017-07-25,20:54:50.061352,2017-07-26 01:54:50.0613,0,0,0,0,0,0,0,0,9675,240516344,0,0,0,0,0,2,0,0,0,0,0,0,0,Anonymous
POST _ingest/pipeline/_simulate
{
"pipeline": {
"description": "Ingest flow statistics",
"processors": [
{
"grok": {
"field": "message",
"patterns": [
"%{DATA},%{DATA},%{DATA:broker},%{DATA},%{DATA:egname},%{DATA},%{DATA:flowname},%{DATA},%{DATA},%{DATA},%{DATA},%{DATA},%{DATA},%{DATA},%{DATA},%{DATA:enddate},%{DATA:endtime},%{DATA},%{DATA},%{DATA},%{DATA},%{DATA},%{DATA},%{DATA:cpu_time},%{DATA},%{DATA},%{DATA},%{DATA},%{DATA:input_messages},%{GREEDYDATA:extra}"
]
}
},
{
"convert": {
"field": "cpu_time",
"type": "integer"
}
},
{
"convert": {
"field": "input_messages",
"type": "integer"
}
},
{
"set": {
"field": "timestamp",
"value": "{{enddate}} {{endtime}}"
}
},
{
"date": {
"field": "timestamp",
"formats": [
"yyyy-MM-dd HH:mm:ss.SSSSSS"
],
"timezone": "America/Chicago"
}
},
{
"date_index_name": {
"field": "@timestamp",
"index_name_format": "yyyy.MM.dd",
"index_name_prefix": "logstash-egstats-",
"date_rounding": "d"
}
},
{
"remove": {
"field": "timestamp"
}
},
{
"remove": {
"field": "enddate"
}
},
{
"remove": {
"field": "endtime"
}
},
{
"remove": {
"field": "extra"
}
},
{
"set": {
"field": "_type",
"value": "egstats"
}
}
],
"on_failure": [
{
"set": {
"field": "_index",
"value": "failed-{{ _type }}"
}
},
{
"set": {
"field": "error",
"value": "{{ _ingest.on_failure_message }}"
}
}
]
},
"docs" : [
{
"_score": 1,
"_source": {
"input_type": "log",
"message": "Archive,Major Interval,DEV1,f328c29c-c695-11e5-addb-cc355a180000,CDBJVM,907047a8-5201-0000-0080-8e881e33280d,GetAdditionalAttributes,68aa61b2-5201-0000-0080-9c722b3eca55,,,,,2017-07-25,20:50:49.537407,2017-07-26 01:50:49.5374,2017-07-25,20:54:50.061352,2017-07-26 01:54:50.0613,0,0,0,0,0,0,0,0,9675,240516344,0,0,0,0,0,2,0,0,0,0,0,0,0,Anonymous"
}
}
]
}
This works fine. But when i create the pipeline and feed the csv file to the pipeline using filebeat, I get an error. Not sure why filebeat or my pipeline is not treating this as a single line.
Any help is appreciated.
Here's the error i get in
error:Provided Grok expressions do not match field value: [0,0,0,Anonymous]
Also attaching the filebeat config file
filebeat:
prospectors:
- input_type: log
paths:
- /tmp/*.csv
encoding: plain
fields_under_root: false
exclude_lines: ["^Record"]
scan_frequency: 10s
harvester_buffer_size: 16384
max_bytes: 10485760
multiline.pattern: '^Anonymous'
multiline.negate: true
multiline.match: before
pipeline: egstats`
I also tried without the multiline options but that didn't help.