I'm working with ELK 6.7.0 on docker with official images. This is my conf file:
input {
file {
path => "/usr/share/logstash/logs/*.xml"
type => "xml"
sincedb_path => "/dev/null"
codec => multiline {
pattern => "<root>"
negate => "true"
what => "previous"
}
}
}
filter {
xml {
source => "message"
store_xml => false
xpath => [
"/root/ChainId/text()", "ChainId",
"/root/SubChainId/text()", "SubChainId",
"/root/StoreId/text()", "StoreId",
"/root/BikoretNo/text()", "BikoretNo",
"/root/DllVerNo/text()", "DllVerNo"
]
}
}
output {
elasticsearch {
hosts => "elasticsearch:9200"
index => "xml_index"
}
stdout {
codec => rubydebug
}
}
My XML file is:
<?xml version="1.0" encoding="UTF-8"?>
<root>
<ChainId>7290027600007</ChainId>
<SubChainId>001</SubChainId>
<StoreId>001</StoreId>
<BikoretNo>9</BikoretNo>
<DllVerNo>8.0.1.3</DllVerNo>
</root>
I'm trying to parse incoming XML files, but when a new file is created on the path folder logstash parsing it as following:
logstash_1 | { logstash_1 | "path" => "/usr/share/logstash/logs/example10.xml", logstash_1 | "@version" => "1", logstash_1 | "message" => "<?xml version=\"1.0\" encoding=\"UTF-8\"?>", logstash_1 | "type" => "xml", logstash_1 | "@timestamp" => 2019-04-02T04:42:59.248Z, logstash_1 | "host" => "a4f1bf64a3d5" logstash_1 | }
However, When I reload my conf file Logstash surprisingly is parsing my XML successfully:
logstash_1 | { logstash_1 | "StoreId" => [ logstash_1 | [0] "001" logstash_1 | ], logstash_1 | "message" => "<root>\n <ChainId>7290027600007</ChainId>\n <SubChainId>001</SubChainId>\n <StoreId>001</StoreId>\n <BikoretNo>9</BikoretNo>\n <DllVerNo>8.0.1.3</DllVerNo>", logstash_1 | "DllVerNo" => [ logstash_1 | [0] "8.0.1.3" logstash_1 | ], logstash_1 | "type" => "xml", logstash_1 | "SubChainId" => [ logstash_1 | [0] "001" logstash_1 | ], logstash_1 | "BikoretNo" => [ logstash_1 | [0] "9" logstash_1 | ], logstash_1 | "path" => "/usr/share/logstash/logs/example10.xml", logstash_1 | "@version" => "1", logstash_1 | "ChainId" => [ logstash_1 | [0] "7290027600007" logstash_1 | ], logstash_1 | "tags" => [ logstash_1 | [0] "multiline" logstash_1 | ], logstash_1 | "@timestamp" => 2019-04-02T04:43:18.439Z, logstash_1 | "host" => "a4f1bf64a3d5" logstash_1 | } logstash_1 | { logstash_1 | "StoreId" => [ logstash_1 | [0] "001" logstash_1 | ], logstash_1 | "message" => "<root>\n <ChainId>7290027600007</ChainId>\n <SubChainId>001</SubChainId>\n <StoreId>001</StoreId>\n <BikoretNo>9</BikoretNo>\n <DllVerNo>8.0.1.3</DllVerNo>", logstash_1 | "DllVerNo" => [ logstash_1 | [0] "8.0.1.3" logstash_1 | ], logstash_1 | "type" => "xml", logstash_1 | "SubChainId" => [ logstash_1 | [0] "001" logstash_1 | ], logstash_1 | "BikoretNo" => [ logstash_1 | [0] "9" logstash_1 | ], logstash_1 | "path" => "/usr/share/logstash/logs/example11.xml", logstash_1 | "@version" => "1", logstash_1 | "ChainId" => [ logstash_1 | [0] "7290027600007" logstash_1 | ], logstash_1 | "tags" => [ logstash_1 | [0] "multiline" logstash_1 | ], logstash_1 | "@timestamp" => 2019-04-02T04:43:18.440Z, logstash_1 | "host" => "a4f1bf64a3d5" logstash_1 | }
The message field in both events is different parts of the file and seems like Logstash is splitting the file before and after the pattern. Even so, not clear why it doing it just on conf reload.