Hi community.
Recently in my environment, I'm feeling that a input data that a pipeline is too slow. Before explain how is mine solution ELK. Following info’s about my machine:
OS: Linux version 4.1.12-124.25.1 Red Hat
cat /proc/cpuinfo |grep "cpu cores" | awk -F: '{ num+=$2 } END{ print "cpu cores", num }
“cpu cores 16.”
Actually, I have running four pipelines in pipeline.yml:
import_logs_workmon_multline.conf --> beats in 5044
import_logs_EAIFRM.conf --> beats in 9601
import_logs_iis.conf --> beats in 9600
The slower is import_logs_iis.conf, the others are inputting real-time:
input {
beats {
port => 9600
type => "%{type}"
}
}
filter {
if [type] == "servcel" {
csv {
columns => ["CodDistribuidorSAP","Origem","Telefone","UF","CanalRecarga","Valor","NSUServCel","NSUOrigem","logdate1","logdate2","Operacao","logdate3","Intervalo","EstadoTransacao","CodResposta","QtsSondas","QtdPedInlusao","Integrador"]
separator => ","
}
date {
timezone => "Etc/GMT+3"
match => ["logdate1","yyyyMMddHHmmss"]
target => "DataServCel"
}
date {
timezone => "Etc/GMT+3"
match => ["logdate2","yyyyMMddHHmmss"]
target => "DataOrigem"
}
date {
timezone => "Etc/GMT+3"
match => ["logdate3","yyyyMMddHHmmss"]
target => "DataOperacao"
}
mutate {
convert => { "CodDistribuidorSAP" => "integer" }
convert => { "Telefone" => "integer" }
convert => { "Operacao" => "integer" }
convert => { "Intervalo" => "integer" }
convert => { "EstadoTransacao" => "integer" }
convert => { "QtsSondas" => "integer" }
convert => { "QtdPedInlusao" => "integer" }
}
mutate {
add_field => { "status_recarga" => "%{EstadoTransacao}%{CodResposta}%{Operacao}" }
add_field => { "Valor_int" => "%{Valor}" }
remove_field => ["logdate1", "logdate2", "logdate3"]
}
}
else {
dissect {
mapping => {
message => "%{log_timestamp} %{+log_timestamp} %{S-SiteName} %{S-ComputerName} %{S-IP} %{CS-Method} %{CS-URI-Stem} %{CS-URI-Query} %{S-Port} %{CS-Username} %{C-IP} %{CS-Version} %{CS-UserAgent} %{CS-Cookie} %{CS-Referer} %{CS-Host} %{SC-Status} %{SC-SubStatus} %{SC-Win32-Status} %{SC-Bytes} %{CS-Bytes} %{TimeTaken}"
}
}
if ([message] =~ "^#") {
drop { }
}
date {
timezone => "UTC"
#timezone => "Etc/GMT+3"
match => [ "log_timestamp", "yyyy-MM-dd HH:mm:ss" ]
target => "@timestamp"
}
mutate {
convert => { "TimeTaken" => "integer" }
}
############## CRIA CAMPO USER AGENTS ################
useragent {
source => "message"
target => "ua"
}
###################### CONCATENA OS CAMPOS NOME DO NAVEGADOR E VERSÃO E CRIA UM CAMPO UNICO ####################
mutate {
split => ["ua", ","]
add_field => {"Navegador" => "%{[ua][name]}-%{[ua][major]}"}
}
######################## PUBLICS GEO IP's #####################################
if ([C-IP] !~ "^10\.") {
geoip {
source => "C-IP"
database => "D:\ELK\logstash-7.1.1\geoip\GeoLite2-City.mmdb"
target => "geoip"
}
}
mutate {
add_field => {"geoip.cordinates.iso3166" => "%{[geoip][country_code3]}-%{[geoip][region_code]}"}
}
######################## PRIVATE GEO IP's #####################################
if ([C-IP] =~ "^10\.") {
translate {
regex => true
override => true
field => "[C-IP]"
destination => "geoip.cordinates.iso3166"
dictionary_path => "D:\ELK\logstash-7.1.1\dictionary\geoip.cordinates.iso3166.yml"
}
translate {
regex => true
override => true
field => "[C-IP]"
destination => "[geoip][region_name]"
dictionary_path => "D:\ELK\logstash-7.1.1\dictionary\geoip.region.name.yml"
}
translate {
regex => true
override => true
field => "[C-IP]"
destination => "[geoip][region_code]"
dictionary_path => "D:\ELK\logstash-7.1.1\dictionary\geoip.region.code.yml"
}
translate {
regex => true
override => true
field => "[C-IP]"
destination => "[geoip][city_name]"
dictionary_path => "D:\ELK\logstash-7.1.1\dictionary\geoip.city.name.yml"
}
}
}
}
output {
elasticsearch {
hosts => "127.0.0.1:9200"
index => "%{type}-%{+YYYY.MM.dd}"
}
}
Arrives a lot of data’s from IIS log's and a csv file.
For try solve the slow processing I changed logstash.yml for use 16 workers, But I haven't seen any improvements yet.
logstash.yml:
pipeline.workers: 16
pipeline.unsafe_shutdown: false
http.host: "10.58.21.11"
Can you have some tip to improve this situation?