Hi all
I need to parse all xml-elements on all hierarchies in millions of xml-files with logstash as pasted below and store them in separate json fields in Elasticsearch. One xml file shall end up in one json document in ES. I do not want to use xpath and to list all possible fields, just because I cannot know them. they will change over time. Also the levels of the elements can change from file to file. Eg. in the sample below, there is 5 elements <AV9APIDCAPI>, <TRADE>, <INSTSPECIFIER>, <ANNOTATIONS>, <NOTE>
, whereas <TRADE>
is a child of <AV9APIDCAPI>
and <INSTSPECIFIER>, <ANNOTATIONS>
are childs of <TRADE>
. As well <NOTE>
is child of <ANNOTATIONS>
. It can happen that a file comes without <TRADE>
and only <ANNOTATIONS>
and vice/versa.
So far I have not found a smart way to parse all elements wheter there is child elements or not or there is 250 fields or only 3
Any suggestions for smart filter conf in this case?
Thx a lot and Kind regards!
<?xml version="1.0" encoding="utf-16"?>
<AV9APIDATA xmlns="av9api-platform-com">
<TRADE EngineID="2" TradeID="0123456" RouteID="012" RouteName="House" Action="Update" DateTime="2024-06-05T08:02:47.624Z" DateTimeNanoSecondsPart="0" Price="11.000" Volume="22" AggressorCompany="CmpName" AggressorCompanyID="0" AggressorTrader="" AggressorTraderID="0" AggressorUser="" AggressorUserID="0" AggressorAction="Sell" AggressorBroker="BrokerName" AggressorBrokerID="3" InitiatorCompany="someOtherCmpName" InitiatorCompanyID="023" InitiatorTrader="Trader Name" InitiatorTraderID="0987" InitiatorUser="User Name" InitiatorUserID="9876" InitiatorAction="Buy" InitiatorBroker="Another Broker Name" InitiatorBrokerID="123" LastUpdate="2024-06-05T08:03:04.626Z" LastUpdateNanoSecondsPart="0" ForeignLastUpdate="2024-06-05T08:03:04.626Z" ManualDeal="false" VoiceDeal="false" InitSleeve="false" AggSleeve="false" PNC="false" ClearingStatus="Refused" ClearingID="0" InitiatorOwnedSpread="false" AggressorOwnedSpread="false" UnderInvestigation="false" ClearingHouse="Name of Clearinghouse" JTT="false" FromBrokenSpread="false" OtcGiveUp="false" ExecutionVenueID="SOME_STRING_VALUE" ForeignContractID="SOME_VALUE|Value|1234|5|6789|0" InitiatorTradingCapacity="SOMETHING" InitiatorDecisionMaker="01235" InitiatorExecutionMaker="9876" InitiatorDerivativeIndicator="false" InitiatorDEA="false" InitiatorLiquidityProvision="false" ProductClassification="Productname" IsMarketData="true" IsOwnData="true" VenueEntity="This is a text here">
<INSTSPECIFIER InstID="9874563" InstName="Inst Name String" FirstSequenceID="1234569" SeqSpan="Single" FirstSequenceItemID="0123" SecondSequenceItemID="0" FirstSequenceItemName="Thu 06/06/24" SecondSequenceItemName="" TermFormatID="0123987456" ExternalInstID="7893125" />
<ANNOTATIONS>
<NOTE Label="CurrencyIsoCode">CHF</NOTE>
<NOTE Label="ExecutionDT">2024-06-05T08:02:51.733</NOTE>
<NOTE Label="ExecutionWorkflow">Some-Text</NOTE>
<NOTE Label="NegotiationStatus">Another-Text</NOTE>
<NOTE Label="Unit">WM</NOTE>
<NOTE Label="UnitGUID">aa11b313-abc6-0123-4567-987c1e2f7q0t</NOTE>
<NOTE Label="UnitID">98</NOTE>
<NOTE Label="CPTY_Calendar">CH</NOTE>
<NOTE Label="CPTY_CalendarID">01234f53-abc0-11e2-d98c-1f7123abc9g4</NOTE>
<NOTE Label="CPTY_CurrencyScale">1.00</NOTE>
<NOTE Label="CPTY_DealCreationDT">2024-06-05T08:02:47.624</NOTE>
<NOTE Label="CPTY_Execution">Some note here</NOTE>
<NOTE Label="CPTY_ExecutionVenue">Another note here</NOTE>
<NOTE Label="CPTY_NegotiationBroker">This is a text field</NOTE>
<NOTE Label="CPTY_NegotiationBrokerID">123456</NOTE>
<NOTE Label="CPTY_SEGMENT_MIC">HELLO</NOTE>
<NOTE Label="CPTY_UTI">9595989845645645454564QQ4545454545212112552</NOTE>
</ANNOTATIONS>
</TRADE>
</AV9APIDATA>