Hi,
I have experience with indexing csv-files into elasticsearch but not with local xml-files. I am following this tutorial which explains how to index xml-data from a website with Logstash.
My environment does not have access to the internet so I've download the xml-file. I copy/pasted the conf, edited the conf, edited and posted the mapping and run the conf in Logstash. Unfortunately nothing happens. Logstash starts the pipeline and is waiting to pick up an xml-file but it does not collect the data even though the dataset is available at the given location. Any idea what might go wrong?
This is the conf:
input
{
file
{
path => "/home/DSAdmin/stations.xml"
start_position => "beginning"
ignore_older => 0
sincedb_path => "/dev/null"
}
}
filter {
## interpret the message payload as XML
xml {
source => "message"
target => "parsed"
}
## Split out each "station" record in the XML into a different event
split {
field => "[parsed][station]"
add_field => {
## generate a unique id for the station # X the sensor time to prevent duplicates
id => "%{[parsed][station][id]}-%{[parsed][station][lastCommWithServer]}"
stationName => "%{[parsed][station][name]}"
lastCommWithServer => "%{[parsed][station][lastCommWithServer]}"
lat => "%{[parsed][station][lat]}"
long => "%{[parsed][station][long]}"
numBikes => "%{[parsed][station][nbBikes]}"
numEmptyDocks => "%{[parsed][station][nbEmptyDocks]}"
}
}
mutate {
## Convert the numeric fileds to the appropriate data type from strings
convert => {
"numBikes" => "integer"
"numEmptyDocks" => "integer"
"lat" => "float"
"long" => "float"
}
## put the geospatial value in the correct [ longitude, latitude ] format
add_field => { "location" => [ "%{[long]}", "%{[lat]}" ]}
## get rid of the extra fields we don't need
remove_field => [ "message", "parsed", "lat", "long", "host", "http_poller_metadata"]
}
## use the embedded Unix timestamp
date {
match => ["lastCommWithServer", "UNIX_MS"]
remove_field => ["lastCommWithServer"]
}
}
output {
elasticsearch {
action => "index"
hosts => "localhost"
index => "bikestatus-dc-%{+YYYY.MM.dd}"
document_type => "bikestatus"
document_id => "%{[id]}"
}
stdout {}
}
Mapping:
PUT _template/bikestatus
{
"template": "bikestatus-*",
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"_default_": {
"dynamic_templates": [
{
"string_fields": {
"mapping": {
"index": "not_analyzed",
"omit_norms": true,
"type": "string",
"doc_values": true
},
"match_mapping_type": "string",
"match": "*"
}
}
],
"_all": {
"enabled": false
},
"properties": {
"@timestamp": {
"type": "date",
"format": "dateOptionalTime",
"doc_values": true
},
"location": {
"type": "geo_point",
"geohash": true,
"fielddata" : {
"format" : "compressed",
"precision" : "20m"
}
},
"numBikes": { "type": "integer","doc_values": true },
"numEmptyDocks": { "type": "integer","doc_values": true }
}
}
}
}