Hello everyone,
I'm working with ELK in docker. I was able to run Elasticsearch, KIbana, Logstash, and Filebeat with docker containers through a docker-compose file. Ingesting appear to works when I use simple pipelines, yet in the moment I forward that with more complex pipelines it is giving me some errors.
This is my docker-compose file:
version: "2.1"
services:
The environment variable "ELASTIC_VERSION" is used throughout this file to
specify the version of the images to run. The default is set in the
'.env' file in this folder. It can be overridden with any normal
technique for setting environment variables, for example:
ELASTIC_VERSION=5.5.1 docker-compose up
Additionally, the user can control:
* the total memory assigned to the ES container through the variable ES_MEM_LIMIT e.g. ES_MEM_LIMIT=2g
* the memory assigned to the ES JVM through the variable ES_JVM_HEAP e.g. ES_JVM_SIZE=1024m
* the password used for the elastic, logstash_system and kibana accounts through the variable ES_PASSWORD
* the mysql root password through the var MYSQL_ROOT_PASSWORD
* the default index pattern used in kibana via the var DEFAULT_INDEX_PATTERN
* the ES heap size through tt
REF: https://docs.docker.com/compose/compose-file/#variable-substitution
elasticsearch:
container_name: elasticsearch
hostname: elasticsearch
image: "docker.elastic.co/elasticsearch/elasticsearch:{ELASTIC_VERSION}"
environment:
- http.host=0.0.0.0
- transport.host=127.0.0.1
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms{ES_JVM_HEAP} -Xmx${ES_JVM_HEAP}"
mem_limit: {ES_MEM_LIMIT}
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- ./config/elasticsearch/elasticsearch.yml:/usr/share/elasticsearch/elasticsearch.yml
- esdata:/usr/share/elasticsearch/data
#Port 9200 is available on the host
ports: ['9200:9200']
#Healthcheck to confirm availability of ES. Other containers wait on this.
healthcheck:
test: ["CMD", "curl","-s" ,"-f", "-u", "elastic:{ES_PASSWORD}", "http://localhost:9200/_cat/health"]
#Internal network for the containers
networks: ['stack']
kibana:
container_name: kibana
hostname: kibana
image: "docker.elastic.co/kibana/kibana:{ELASTIC_VERSION}"
volumes:
- ./config/kibana/kibana.yml:/usr/share/kibana/kibana.yml
#Port 5601 accessible on the host
ports: ['5601:5601']
networks: ['stack']
#We don't start Kibana until the ES instance is ready
depends_on: ['elasticsearch']
environment:
- "ELASTICSEARCH_PASSWORD={ES_PASSWORD}"
healthcheck:
test: ["CMD", "curl", "-s", "-f", "http://localhost:5601/login"]
retries: 6
#Logstash container
logstash:
container_name: logstash
hostname: logstash
image: "docker.elastic.co/logstash/logstash:{ELASTIC_VERSION}"
volumes:
- ./config/logstash/logstash.yml:/usr/share/logstash/config/logstash.yml:ro
- ./config/logstash/pipeline:/usr/share/logstash/pipeline/
- ./.kitchen/logs:/logs
#Port 5044 accessible on the host
ports: ['5044:5044']
networks: ['stack']
depends_on:
elasticsearch: { condition: service_healthy }
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9600"]
retries: 10
interval: 10s
#Filebeat container
filebeat:
container_name: filebeat
hostname: filebeat
user: root
image: "docker.elastic.co/beats/filebeat:{ELASTIC_VERSION}"
volumes:
#Mount the filebeast configuration so users can make edit
- ./config/beats/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml
#Mount the prospectors directory. Users can in turn add propspectors to this directory and they will be dynamically loaded
#- ./config/beats/filebeat/prospectors.d/:/usr/share/filebeat/prospectors.d/
#Mount the test volume
- ./.kitchen/logs/:/usr/share/filebeat/logs/
#Named volume fsdata. This is used to persist the registry file between restarts, so to avoid data duplication
- fbdata:/usr/share/filebeat/data/
networks: ['stack']
command: filebeat -e -strict.perms=false
restart: on-failure
depends_on:
#wait for the these services to come up. This ensures the logs are available and logstash exists for indexing
logstash: { condition: service_healthy }
#Configure Stack container. This short lived container configures the stack once Elasticsearch is available.
#More specifically, using a script it sets passwords and sets a default index pattern.
configure_stack:
container_name: configure_stack
image: docker.elastic.co/beats/metricbeat:{ELASTIC_VERSION}
volumes: ['./init/configure-stack.sh:/usr/local/bin/configure-stack.sh:ro']
command: ['/bin/bash', '-c', 'cat /usr/local/bin/configure-stack.sh | tr -d "\r" | bash']
networks: ['stack']
environment: ['ELASTIC_VERSION={ELASTIC_VERSION}','ES_PASSWORD={ES_PASSWORD}','DEFAULT_INDEX_PATTERN={DEFAULT_INDEX_PATTERN}']
depends_on: ['elasticsearch','kibana']
volumes:
#Es data
esdata:
driver: local
#Filebeat data i.e. registry file
fbdata:
driver: local
networks: {stack: {}}
and this is the pipeline (conf file) I'm trying to run with docker logstash :
input{
beats{
port => "5044"
}
}
filter{
grok
{
match => {"message" => "%{TIMESTAMP_ISO8601:logdate1} %{LOGLEVEL} %{DATA:thread_name} %{SPACE} %{DATA:class_name}:%{GREEDYDATA}"}
}
if ("_grokparsefailure" in [tags]) {
drop {}
}
else{
ruby {
code => "
event.set('logdate2', event.get('logdate1'))"
}
date {
match => ["logdate2", "ISO8601"]
target => "logdate2"
}
date_formatter {
source => "logdate2"
target => "formatted_date1"
pattern => "yyyy-MM-dd"
}
date {
match => ["logdate1" , "ISO8601", "YYYY-MM-dd HH:mm:ss,SSS Z"]
target => "@timestamp"
}
}
grok {
match => ["[log][file][path]", "%{GREEDYDATA:path}/%{GREEDYDATA:metadata}"]
}
if ("_grokparsefailure" in [tags]) {
mutate {
add_tag => ["error_linux"]
remove_tag => ["_grokparsefailure"]
}
}
else{
}
grok {
match => ["[log][file][path]", "%{GREEDYDATA:path}\\%{GREEDYDATA:metadata}"]
}
if ("_grokparsefailure" in [tags]) {
mutate {
add_tag => ["error_windows"]
remove_tag => ["_grokparsefailure"]
}
}
else{
}
mutate {
remove_field => "[log][file][path]"
}
grok
{
match => {"metadata" => "%{GREEDYDATA:process_name}-%{NONNEGINT:run_ID}-%{WORD:output_type}(?<message_metadata>%{NUMBER}-%{GREEDYDATA})"}
}
if ("_grokparsefailure" in [tags]) {
mutate {
#add_tag => ["error3"]
remove_tag => ["_grokparsefailure"]
}
}
else{
}
grok {
match => {"metadata" => "%{GREEDYDATA:process_name}-%{NONNEGINT:run_ID}-%{WORD:output_type} %{SPACE}-%{GREEDYDATA}"}
}
if ("_grokparsefailure" in [tags]){
mutate {
#add_tag => ["error4"]
remove_tag => ["_grokparsefailure"]
}
}
else{
mutate {
#remove_tag =>["error3"]
}
}
mutate {
remove_field => ["[agent][type]","[agent][version]","[ecs][version]","[host][architecture]","[host][containerized]","[host][hostname]","[host][id]","[host][name]","[host][os][codename]","[host][os][family]","[host][os][kernel]","[host][os][name]","[host][os][platform]","[host][os][version]","[input][type]","[log][offset]","[agent][ephemeral_id]","[agent][hostname]","[agent][id]"]
}
}
output {
elasticsearch {
hosts => ['elasticsearch']
user => 'elastic'
password => 'changeme'
index => ["hs_index_full"]
document_type => "default"
}
#pipeline{send_to => ["part2"]}
#pipeline{send_to => ["part3"]}
}
So, my question are the followings:
-
How I could integrate multipipelines with in ELk with docker within the docker-compose file. Basically, how they works the mounted paths in the case of multipipelines, and what considerations I should have in order to do it well? (I have been trying this during all the week).
-
Why the pipeline I mentioned before does not run well with docker ELK if it worked perfectly in the normal ELK suit. I'm figuring out maybe a plugin is stopping it to work properly. If so, how i could add a plugin of logstash or kibanna when i'm using Docoker.
(Please dont direct my to the documentation of the elasticsearch official page, because I having trying with it and I think is not really clear). But please help me! I having trying a lot in this aspect.
Thanks so much,
JUAN DAVID BRICENO GUERRERO
MASTER STUDENT IN SUSTAINABLE INDUSTRIAL ENGINEERING