So I have 1.5 TB and indices I can see that it is generating almost 150 G per day. So how can I reduce that, payload is generating a huge amount of data. Is there any way I can reduce the space utilization or something in elasticsearch.
[fuseadmin@a0110pcsgmon02 logstash-5.5.0]$ cat logstash.conf
input {
beats {
type => beats
port => 5044
}
}
filter {
if [type] == "log" { #Grok to get SourcesystemID
grok {
match => {
"message" => "(?<=SourceSystemID:)%{WORD:sourceSystemID}"
}
}
if ![sourceSystemID] {
grok {
match => {
"message" => "(?<=ChannelID:)%{DATA:sourceSystemID}(?>\|)"
}
}
if ![sourceSystemID] {
drop {}
}
}
#Grok to get Container Name
grok {
match => {
"message" => "(?<=ContainerName:)%{GREEDYDATA:containerName}"
}
}
#Grok to get InvocationPoint
grok {
match => {
"message" => "(?<=LogPoint:)%{WORD:logPoint}"
}
}
if ![logPoint] {
grok {
match => {
"message" => "(?<=InvocationPoint:)%{DATA:logPoint}(?>|)"
}
}
} #Grok to get LogTimestamp
grok {
match => {
"message" => "(?<=LogTimestamp:)%{TIMESTAMP_ISO8601:logTimestamp}"
}
}
if ![logTimestamp] {
grok {
match => {
"message" => "%{TIMESTAMP_ISO8601:logTimestamp}%{SPACE}|%{SPACE}%{LOGLEVEL:level}%{SPACE}|%{SPACE}%{DATA:thread}%{SPACE}|%{SPACE}%{DATA:serviceNameOld}%{SPACE}|%{SPACE}%{DATA:bundle}%{SPACE}|%{SPACE}%{GREEDYDATA:logdetails}"
}
} #hardcoded to get if the log is first or last entry
grok {
match => {"logdetails" => "%{WORD:first_word}"}
}
}
#Grok to get GUID
grok {
match => {
"message" => "(?<=GUID:)%{DATA:GUID}(?>\|)"
}
}
#Grok to get ServiceName
grok {
match => {
"message" => "(?<=ServiceName:)%{DATA:serviceName}(?>\|)"
}
}
#Grok to get ServerName
grok {
match => {
"message" => "(?<=ManagedServer:)%{IP:managedServer}"
}
}
#Grok to get ErrorCode
grok {
match => {
"message" => "(?<=ErrorCode:)%{DATA:errorCode}(?>\|)"
}
}
date {
match => ["logTimestamp" , "ISO8601"]
}
#Grok to get ReferenceID added on 16th Apr 2018 by Rudrajit
grok {
match => {
"message" => "(?<=ReferenceID:)%{DATA:ReferenceID}(?>\|)"
}
}
#Grok to get TargetService added on 16th Apr 2018 by Rudrajit
grok {
match => {
"message" => "(?<=TargetService:)%{DATA:TargetService}(?>\|)"
}
}
#tag the log entry with first or last, drop other entry
if [logTimestamp] != "" {
if [errorCode] != "" {
mutate {
add_tag => ["error_log"]
}
} else {
if [first_word] == "Incoming_Request" {
mutate {
add_tag => ["start_log"]
}
} else if [first_word] == "Outbound" {
mutate {
add_tag => ["end_log"]
}
} else if [logPoint] == "InboundReq" {
mutate {
add_tag => ["start_log"]
}
} else if [logPoint] == "InboundResp" {
mutate {
add_tag => ["end_log"]
}
} else {
# drop {}
}
} else {
# drop {}
}
#start logstash processing to get response time
elapsed {
start_tag => "start_log"
end_tag => "end_log"
unique_id_field => "GUID"
new_event_on_match => false
}
Please format your code, logs or configuration files using </> icon as explained in this guide and not the citation button. It will make your post more readable.
Or use markdown style like:
```
CODE
```
There's a live preview panel for exactly this reasons.
Lots of people read these forums, and many of them will simply skip over a post that is difficult to read, because it's just too large an investment of their time to try and follow a wall of badly formatted text.
If your goal is to get an answer to your questions, it's in your interest to make it as easy to read and understand as possible.
Please update your post.
I'm not going to answer more in this thread as @Christian_Dahlqvist already helped you in the other thread. Let's keep the rest of the discussion there.
Apache, Apache Lucene, Apache Hadoop, Hadoop, HDFS and the yellow elephant
logo are trademarks of the
Apache Software Foundation
in the United States and/or other countries.