Hi everyone,
I'm working on log files and I have a little problem, this is an example of my logs :
192.168.0.1(localhost) (user1)
file1192.168.0.2(localhost) (user2)
file2
file3
file2 - read only
A new line is identified by an IP address, for each line I want to keep the following information :
- IP address
 - domain
 - user
 - file
 
For the first line, it isn't very complicated because I have only 1 file but for the second line I have a problem, I can get only the first file (file2). At the end, I'd like to have the following data into Elasticsearch :
192.168.0.1 localhost user1 file1
192.168.0.2 localhost user2 file2
192.168.0.2 localhost user2 file3
I don't want to keep the lines that don't begin by an IP address or that contain a dash '-', in my example I don't want to keep : file2 - read only.
This is the configuration file that I use for Logstash :
input {
    file {
        type => "my_dashboard"
        path => "/path-to-my-data/*"
        start_position => "beginning"
        sincedb_path => "since_db"
        codec => plain { charset => "ANSI_X3.4-1968"}
    }
}
filter {
    multiline {
        pattern => "%{IP:IP}"
        what => "next"
    }
    grok {
        match => {"message" => "%{IP:IP}\(%{HOSTNAME:domain}\)%{SPACE}\(%{USERNAME:user}\)%{SPACE}%{NOTSPACE:file}"}
    }
    mutate {
        remove_field => ["host","@version","path"]
    }
}
output {
    if "_grokparsefailure" in [tags] {
        file {
            path => "./grokparsefailure.log"
        }
    }
    else if [type] == "my_dashboard" {
        elasticsearch {
            hosts => "192.168.0.1:9200"
            index => "user_data"
            document_type => "user"
        }
    }
}
Thank you in advance for your help ![]()