Hi there,
I would like to ignore inserting entry in Elasticsearch index if the parsed log pattern does not contain the specific string jenkins_build_number
Here is my filebeat and logstash configs.
filebeat.yml
#======================== Filebeat inputs ==========================
filebeat.inputs:
- type: log
enabled: true
paths:
- jenkins.log
exclude_files: ['.gz$']
multiline.pattern: '^[a-zA-Z]+\s[0-9]{1,2},\s[0-9]{4}\s[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\s(?:AM|am|PM|pm)'
multiline.negate: true
multiline.match: after
fields:
type: jenkins-server
fields_under_root: true
#========================== Outputs ================================
#------------------------- Logstash output -------------------------
output.logstash:
# The Logstash hosts
hosts: ["XXXXX"]
bulk_max_size: 200
#======================== Processors ==============================
# Configure processors to enhance or manipulate events generated by the beat.
processors:
- add_host_metadata: ~
# - add_cloud_metadata: ~
logstash - pipeline.conf
input {
beats {
port => "9601"
}
}
filter {
if [type] == "jenkins-server" {
# set all messages from the jenkins log as type 'jenkins' and add the @message field.
mutate {
add_field => ["@message_type", "jenkins"]
add_field => ["@message", "%{message}"]
}
}
}
# now that we have possibly-multiline events, we can clean them up.
filter {
# munge the possibly-multiline messages into a single string
mutate {
join => ["@message", "\n"]
}
# split @message into __date and __msg, and overwrite the @timestamp value.
grok {
match => [ "@message", "^(?<__date>%{MONTH} %{MONTHDAY}, %{YEAR} %{TIME} (AM|PM)) (?<__msg>.+)" ]
}
date {
match => [ "__date", "MMM dd, YYYY HH:mm:ss a"]
}
# ...now some patterns to categorize specific event types...
# parse build completion messages, adding the jenkins_* fields and the 'build' tag
grok {
match => [ "@message", "(?<jenkins_job>\S+) #(?<jenkins_build_number>\d+) (?<__msg>.+): (?<jenkins_build_status>\w+)" ]
tag_on_failure => []
overwrite => true
add_tag => ['build']
}
# convert build number from string to integer
mutate {
convert => ["jenkins_build_number", "integer"]
}
# remove any empty fields
ruby {
code => "event.to_hash.delete_if {|field, value| value == '' }"
}
# tag messages that come from the git SCM plugin (and associated classes)
grok {
match => [ "@message", "\.git\."]
tag_on_failure => []
add_tag => ['git']
}
# if we have extracted a short message string, replace @message with it now
if [__msg] {
mutate {
replace => ["@message","%{__msg}"]
}
}
# convert @message back into an array of lines
mutate {
split => ["@message", "\n"]
}
}
# clean-up temporary fields and unwanted tags.
filter {
mutate {
remove_field => [
"message",
"__msg",
"__date",
"dumps1",
"plugin_command"
]
remove_tag => [
"multiline",
"_grokparsefailure"
]
}
}
# send it on to the elasticsearch
output {
elasticsearch {
hosts => ["XXXXXXXXXXXXXXXX"]
# username & password to connect to elaticsearch
user => "XXX"
password => "XXX"
action => "index"
index => "jenkins-%{+YYYY.MM.dd}"
}
# use this if you want to verify logs are being sent to elasticsearch or not
stdout { codec => rubydebug }
}
Output:
The highlighted part in the image has 2 records without jenkins_build_number and status, so I would like to remove or ignore before sending it Elasticsearch.
Thanks,