Hello,
I have been gone trough most documents about how to reindex Elasticsearch data. However, I am still not quite satified with the result I am getting.
Question: sometimes we would like to change the logstash grok patterns and create new fields in index. I noticed that we could add new fields by change mapping. But how to add field with pattern? so that it could be generated match like logstash based on the data.
Since I could not find the way to add fields with pattern in mapping, I end up using logstash elasticsearch plugin to reindex the indices. It kind of works, but just wondering if there is any better way to do it.
2nd Question: some docs shows deleted after reindex with logstash elasticsearch plugin. why?
curl 'localhost:9200/_cat/indices?v' | grep syslog
health status index                pri rep docs.count docs.deleted store.size pri.store.size
yellow open   syslog-2016.11.08      5   1    2852206       858040      1.2gb          1.2gb
yellow open   syslog-v2-2016.07.15   5   1          1            0        9kb            9kb
yellow open   new-syslog             5   1    1230000        90556    569.3mb        569.3mb
the new-syslog is the new index which is generated by logstash elasticsearch plugin and 90556 docs were deleted.
below is the reindexing conf file for your information:
input {
elasticsearch {
hosts => [ "localhost:9200" ]
index => "syslog-2016.11.08"
size => 1000
scroll => "5m"
docinfo => true
scan => true }
}
filter {
mutate {
#since all fields will be generated again, old fields are not required
remove_field => ["datastore_latency_to", "protocol", "@timestamp", "src_int", "pid", "message_system_info", "syslog_facility", "dst_int", "message_thread_id", "message_service_info", "port", "syslog_facility_code", "syslog_message", "tags", "ASA_type", "device_status", "syslog_timestamp", "program", "@source_host", "vmware_warning_msg", "message-body", "type", "message_service", "syslog_severity_code", "datastore_latency_from", "host", "syslog_program", "@message", "syslog_hostname", "syslog_severity", "syslog_level", "message_opID", "device_naa", "acl", "action", "syslog_pri", "@version"]}
if [message] =~ "%ASA-" {
grok {
match => ["message", "%{CISCO_TAGGED_SYSLOG} %{GREEDYDATA:cisco_message}"]
add_tag => "cisco-fw"
}
syslog_pri { }
grok {
match => [
"cisco_message", "%{CISCOFW106001}",
"cisco_message", "%{CISCOFW106006_106007_106010}",
"cisco_message", "%{CISCOFW106014}",
"cisco_message", "%{CISCOFW106015}",
"cisco_message", "%{CISCOFW106021}",
"cisco_message", "%{CISCOFW106023}",
"cisco_message", "%{CISCOFW106100}",
"cisco_message", "%{CISCOFW110002}",
"cisco_message", "%{CISCOFW302010}",
"cisco_message", "%{CISCOFW302013_302014_302015_302016}",
"cisco_message", "%{CISCOFW302020_302021}",
"cisco_message", "%{CISCOFW305011}",
"cisco_message", "%{CISCOFW313001_313004_313008}",
"cisco_message", "%{CISCOFW313005}",
"cisco_message", "%{CISCOFW402117}",
"cisco_message", "%{CISCOFW402119}",
"cisco_message", "%{CISCOFW419001}",
"cisco_message", "%{CISCOFW419002}",
"cisco_message", "%{CISCOFW500004}",
"cisco_message", "%{CISCOFW602303_602304}",
"cisco_message", "%{CISCOFW710001_710002_710003_710005_710006}",
"cisco_message", "%{CISCOFW713172}",
"cisco_message", "%{CISCOFW733100}",
"cisco_message", "%{GREEDYDATA:cisco_message}"
]
}
  }
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "new-syslog"
document_type => "%{[@metadata][_type]}"
document_id => "%{[@metadata][_id]}"
}
}
Thanks and regards,
Roger