Bad output logstash


(Erick Garcia Perez) #1

I'm trying to parse a log of apache_access, when it have diferente entries, concatenates the whole file in the same message, having an entry similar to this:
can not separate them by lines

\n66.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n66.249.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n660.249.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n66.249.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n66.249.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n66.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"\n6.250.73.158 - - [06/Sep/2017:11:57:05 -0500] "GET /clubnupcial/index.php/tendencia/disenador/Vestidos-de-Novia/Max-Mara HTTP/1.1" 200 222254 "-" "Mozilla/5.0 (compatible; Googlebot/2.1;


(Nachiket) #2

Hi Eric,
Is this the file you want to parse? Does it not have separate lines for each log?
Did you try using the multiline filter?
What is your config? Are there any errors?


(Erick Garcia Perez) #3

Hi NerdSec my configuration file is the following:
input {
file{
path => "/home/clusterelastic/cluster/nodo1/logstash-5.5.2/logsaleer/access"
type => "apache_access"
}
file{
path => "/home/clusterelastic/cluster/nodo1/logstash-5.5.2/logsaleer/error_log.log"
type => "apache_error"
}
file{
path => "/home/clusterelastic/cluster/nodo1/logstash-5.5.2/logsaleer/mysql_error.log"
type => "mysql"
}
file{
path => "/home/clusterelastic/cluster/nodo1/logstash-5.5.2/logsaleer/catalina.out"
type => "tomcat"
codec => multiline {
pattern => "(^%{MONTH} %{MONTHDAY}, 20%{YEAR} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) (?:AM|PM))"
what => "previous"
}
}
#file{
#path => "/home/clusterelastic/cluster/nodo1/logstash-5.5.2/logsaleer/localhost_access_log.2017-08-21"
# type => "apache_access"
#}
}
filter {
multiline{ #Nothing will pass this filter unless it is a new event ( new [2014-03-02 1.... )
pattern => "^["
what => "previous"
negate=> true
}
if [type] in ["apache_access"]{
grok {
match => {
"message" => "%{COMBINEDAPACHELOG} %{IPORHOST:serverip} %{NUMBER:serverport} %{NUMBER:elapsed_millis} %{NOTSPACE:sessionid} %{QS:proxiedip} %{QS:loginame}"
}
overwrite => [ "message" ]
}
}
if [type] in ["apache_error","apache-error","error_log"] {
grok {
match => ["message", "[%{DATA:day} %{DATA:month} %{YEAR:year} : %{DATA:hour}:%{DATA:minute}:%{DATA:second}] [%{NOTSPACE:loglevel}] (?:[client %{IPORHOST:clientip}] ){0,1}%{GREEDYDATA:message}"]

  }

   date {
    match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z"]
}
}

}
filter {

Deal with the multi-line MySQL/MariaDB log entries

if [type] in ["mysql","mysql_error"] {

   # Strip out MySQLs messy timestamps
   grok {
       match => [ 'message', "(?m)^%{NUMBER:date} *%{NOTSPACE:time} %{GREEDYDATA:message}" ]
     # add_field => { "mysql_time" => "%{date} %{time}" }
   }
   # Check if we have a severity field
   #if [message] =~ /^\[/ {
       #grok {
       #    match => [ 'message', "(?m)\[%{NOTSPACE:severity}\] %{GREEDYDATA:message}" ]
          
      # }
   #}
   # If the message contains WSREP:, it regards the galera cluster
   #if  [message] =~ /WSREP:/ {
    #   mutate {
     #      replace => [ "type", "Galera" ]
      # }
       #grok {
        #   match => [ 'message', "(?m)WSREP: %{GREEDYDATA:message}" ]
           
     #  }
   #}

}
}
filter {
if [type] == "tomcat" {
multiline {
pattern => "(^%{TOMCAT_DATESTAMP})|(^%{CATALINA_DATESTAMP})"
negate => true
what => "previous"
}
}
grok {
match => [ "message", "%{CATALINALOG}" ]
}
}
filter {
if [type] in ["apache_access"]{
grok {
match => {
"message" => "%{COMBINEDAPACHELOG} %{IPORHOST:serverip} %{NUMBER:serverport} %{NUMBER:elapsed_millis} %{NOTSPACE:sessionid} %{QS:proxiedip} %{QS:loginame}"
}
overwrite => [ "message" ]
}
}}
output {
elasticsearch {
hosts => ["localhost:9200"]
}
csv{
fields =>["request"]
path =>"/home/clusterelastic/cluster/csv-salida.csv"
}
stdout { codec => rubydebug }
}

My entities log :
19.1.1.2 - - [25/Aug/2017:00:38:48 -0500] "GET /uniformes/api/img/nuevosLogosEmpresa/d8efde9ccca02fcc3b3c7bceb25e40d8.png HTTP/1.1" 200 4068 "-" "Mozilla/5.0 (Linux; Android 4.4.4; SM-J100M Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36"
19.1.1.2 - - [25/Aug/2017:00:38:48 -0500] "GET /uniformes/api/img/nuevosLogosEmpresa/d8efde9ccca02fcc3b3c7bceb25e40d8.png HTTP/1.1" 200 4068 "-" "Mozilla/5.0 (Linux; Android 4.4.4; SM-J100M Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36"
190.1.1.2 - - [25/Aug/2017:00:38:48 -0500] "GET /uniformes/api/img/nuevosLogosEmpresa/d8efde9ccca02fcc3b3c7bceb25e40d8.png HTTP/1.1" 200 4068 "-" "Mozilla/5.0 (Linux; Android 4.4.4; SM-J100M Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36"
190.1.1.2 - - [25/Aug/2017:00:38:48 -0500] "GET /uniformes/api/img/nuevosLogosEmpresa/d8efde9ccca02fcc3b3c7bceb25e40d8.png HTTP/1.1" 200 4068 "-" "Mozilla/5.0 (Linux; Android 4.4.4; SM-J100M Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36"
190.1.1.2 - - [25/Aug/2017:00:38:48 -0500] "GET /uniformes/api/img/nuevosLogosEmpresa/d8efde9ccca02fcc3b3c7bceb25e40d8.png HTTP/1.1" 200 4068 "-" "Mozilla/5.0 (Linux; Android 4.4.4; SM-J100M Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36"
Mobile Safari/537.36"
11.1.1.2 - - [25/Aug/2017:00:38:48 -0500] "GET /uniformes/api/img/nuevosLogosEmpresa/d8efde9ccca02fcc3b3c7bceb25e40d8.png HTTP/1.1" 200 4068 "-" "Mozilla/5.0 (Linux; Android 4.4.4; SM-J100M Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36"
11.1.1.2 - - [25/Aug/2017:00:38:48 -0500] "GET /uniformes/api/img/nuevosLogosEmpresa/d8efde9ccca02fcc3b3c7bceb25e40d8.png HTTP/1.1" 200 4068 "-" "Mozilla/5.0 (Linux; Android 4.4.4; SM-J100M Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36"
17.1.1.2 - - [25/Aug/2017:00:38:48 -0500] "GET /uniformes/api/img/nuevosLogosEmpresa/d8efde9ccca02fcc3b3c7bceb25e40d8.png HTTP/1.1" 200 4068 "-" "Mozilla/5.0 (Linux; Android 4.4.4; SM-J100M Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36"


(Erick Garcia Perez) #4

the error is that it joins all the file parse a same message and saves it in the same record in elasticsearch


(system) #5

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.