cluster.name: Search
node.name: fs-master
path.data: E:\LandingZone\Elastic\fs-master\Data
path.logs: D:\APPS\ELK8.0.0\elasticsearch-8.0.0\logs
network.host: 7.**.**.*3 (which is IP address of the server where master node is running)
http.port: 9200
discovery.seed_hosts: ["7.**.**.*1", "7.**.**.*2", "7.**.**.*3"] (IP addresses of all three nodes)
cluster.initial_master_nodes: ["7.**.**.*3"] (which is IP address of the server where master node is running))
xpack.security.enabled: false
Here is my logstash config file I use for data Ingestion.
input {
stdin {
codec => line {
charset => "UTF-8"
}
}
}
filter {
# The fingerprint filter creates a unique identifier that is used as the document id.
# This creates a hash key based on the content message that is used as a unique id/key for each elasticsearch entry.
fingerprint {
source => "message"
target => "[@metadata][fingerprint]"
method => "SHA1"
# For the key we use the name of the index followed by the unique string on the first line of the csv data file.
key => "traveller_no_dups"
base64encode => true
}
# Defines all the field in the csv file in the order they are found.
csv {
separator => ","
columns => [
"SURNAME",
"FIRST_NAME",
"MIDDLE_NAME",
"BIRTHDATE",
]
}
# Add new DOB field will hold the BIRTHDATE content.
mutate {
add_field => { "DOB" => "%{BIRTHDATE}" }
}
# Process the birthdate as DOB. Convert the birthdate into a date value.
date {
match => [ "DOB", "yyyyMMdd"]
target => "DOB"
}
# remove all fields we dont need anymore.
mutate {
remove_field => [ "BIRTHDATE" ]
}
}
output {
elasticsearch {
action => "index"
hosts => "localhost:9200"
index => "traveller_no_dups"
document_id => "%{[@metadata][fingerprint]}"
}
stdout {codec => rubydebug}
# stdout {}
}
Apache, Apache Lucene, Apache Hadoop, Hadoop, HDFS and the yellow elephant
logo are trademarks of the
Apache Software Foundation
in the United States and/or other countries.