Hi,
I'm using ELK-Forensics (https://github.com/cvandeplas/ELK-forensics) with a few minor changes:
input {
tcp {
type => "l2tcsv"
port => 18005
}
}
filter {
if [type] == "l2tcsv" {
csv {
separator => ","
quote_char => "ª" # workaround: don't use a quote character as " gives issues if the field contains a "
columns => ["date","time","timezone","macb","source","sourcetype","eventtype","user","host","short","desc","version","filename","inode","notes","format","extra"]
}
if [date] == "date" {
drop {} # drop the first line that contains the column names
}
if [date] == "index_name" {
grok { match => {"time" => ["%{DATA:index_name}"]}}
}
mutate { merge => ["date", "time"] } # merge and join need to be in separate mutates
mutate { merge => ["date", "timezone"] } # merge and join need to be in separate mutates
mutate { join => ["date", " "] } # merge and join need to be in separate mutates
date {
match => ["date", "MM/dd/YYYY HH:mm:ss z" ]
}
# extract macb info
if ("M" in [macb]) { mutate { add_tag => ["modified"] } }
if ("A" in [macb]) { mutate { add_tag => ["accessed"] } }
if ("C" in [macb]) { mutate { add_tag => ["changed"] } }
if ("B" in [macb]) { mutate { add_tag => ["birth"] } }
# Extract filenames
if [source] == "FILE" {
grok {
break_on_match => false
match => ["desc", "(:(?<extracted.path>/.*?))?$",
"extracted.path", "(?<extracted.filename>[^/]+?)?$",
"extracted.filename", "((\.(?<extracted.ext>[^./]+))?)?$"
]
}
}
if [source] == "META" {
grok {
break_on_match => false
match => ["filename", "(:(?<extracted.path>/.*?))?$",
"extracted.path", "(?<extracted.filename>[^/]+?)?$",
"extracted.filename", "((\.(?<extracted.ext>[^./]+))?)?$"
]
}
}
# Extract urls
if [source] == "WEBHIST" {
grok { match => ["desc", "Location: (?<extracted.url>.*?)[ $]"] }
}
mutate {
convert => ["inode", "integer",
"version", "integer"]
lowercase => ["extracted.ext"]
remove_field => ["message", "short", "date", "time", "timezone"]
}
}
}
output {
if [type] == "l2tcsv" {
elasticsearch {
index => "%{index_name}"
hosts =>[array of ips]
}
}
}
The data I'm processing is Psort CSV output (https://github.com/log2timeline/plaso/wiki), and I before I'm sending it to Logstash, I'm adding the line I described before at the beginning of the document.
If there's any other information you need, please let me know.
Thank you for your help!