Search a string using logstash

Hi,

We have a requirement to parse a XML ( XML2) which is available as a string within a XML( XML1) document. I was able to parse the XML1 using xml filter in logstash. Please help to parse /search a particular string within XML2 and get its value.

Sharing the XML and logstash filter:

XML

<tns:ActivityInput xmlns:tns="http://www.tibco.com/namespaces/tnt/plugins/jms+32553768-9d2e-4ed0-90ae-0a1e20803547+input" xmlns:tns2="http://www.ericsson.com/tibco/schema/Logger">
  <Body>
    <tns2:Logger_Request>
      <tns2:conversationId>TIB-22b0be25-9e87-49ba-a3c0-1b617c82212a</tns2:conversationId>
      <tns2:correlationId>4618bdb4-36fb-4a37-b161-7fb07aeaedd6</tns2:correlationId>
      <tns2:eventId>CUST0000000023</tns2:eventId>
      <tns2:logTimestamp>2021-02-17T17:49:39.828+05:30</tns2:logTimestamp>
      <tns2:type>START</tns2:type>
      <tns2:businessReferenceId>228526</tns2:businessReferenceId>
      <tns2:systemConsumer>DFE</tns2:systemConsumer>
      <tns2:systemDestination>EB</tns2:systemDestination>
      <tns2:serviceName>SubmitPayment</tns2:serviceName>
      <tns2:operationName>POST</tns2:operationName>
      <tns2:payload>&lt;?xml version="1.0" encoding="UTF-8"?&gt;&lt;CustomerPayment xmlns="http://xmlns.ericsson.com/CDM/Payment" xmlns:ns1="http://xmlns.ericsson.com/CDM/Base"&gt;&lt;ns1:customerId&gt;CUST0000000023&lt;/ns1:customerId&gt;&lt;ns1:billingAccountId&gt;BA0000000044&lt;/ns1:billingAccountId&gt;&lt;transactionType&gt;DEPOSIT12&lt;/transactionType&gt;&lt;transactionRefNum&gt;228526&lt;/transactionRefNum&gt;&lt;Amount&gt;&lt;ns1:amount&gt;1.0E16&lt;/ns1:amount&gt;&lt;ns1:units&gt;&lt;ns1:currencyCode&gt;IDR&lt;/ns1:currencyCode&gt;&lt;/ns1:units&gt;&lt;/Amount&gt;&lt;referenceDate&gt;2020-12-21&lt;/referenceDate&gt;&lt;entryDate&gt;2021-02-11&lt;/entryDate&gt;&lt;/CustomerPayment&gt;</tns2:payload>
      <tns2:Log-Level>INFO</tns2:Log-Level>
      <tns2:appSpace>BWEclipseAppSpace</tns2:appSpace>
      <tns2:appNode>BWEclipseAppNode</tns2:appNode>
      <tns2:engine>Main</tns2:engine>
      <tns2:appModule>GW.SubmitPayment</tns2:appModule>
    </tns2:Logger_Request>
  </Body>
</tns:ActivityInput>

Current logstash filter.

filter
{
 xml {
        remove_namespaces => "true"
        source => "message"
        store_xml => "false"
        target => "doc"
        xpath =>  [
                   "//conversationId/text()","conversationId",
                   "//eventId/text()","eventId",
                   "//correlationId/text()","correlationId",
                   "//systemConsumer/text()","systemConsumer",
                   "//systemDestination/text()","systemDestination",
                   "//serviceName/text()","serviceName",
                   "//payload/text()","payload"
        ]

    }
}

I want to extract any element inside element of above XML shared.

Please help

Thanks,
Rabin

Hello Rabin,

You could add an additional xml filter like this:

 xml {
        remove_namespaces => "true"
        source => "[doc][payload]"
        store_xml => "false"
        target => "doc2"
        xpath =>  [
                   "//customerId/text()","customerId"
        ]

    }

Best regards
Wolfram

Hi,

When i tried to add another filter. Logstash is not starting. Am i missing something?

filter
{
 xml {
        remove_namespaces => "true"
        source => "message"
        store_xml => "false"
        target => "doc"
        xpath =>  [
                   "//conversationId/text()","conversationId",
                   "//eventId/text()","eventId",
                   "//correlationId/text()","correlationId",
                   "//systemConsumer/text()","systemConsumer",
                   "//systemDestination/text()","systemDestination",
                   "//serviceName/text()","serviceName",
                   "//payload/text()","payload"
        ]

    }


 xml{
        remove_namespaces => "true"
        source => "[doc][payload]"
        store_xml => "false"
        target => "doc2"
        xpath =>  [
                   "//customerId/text()","customerId"
        ]
}

}

logstash error message

[2021-03-01T11:43:00,561][ERROR][logstash.javapipeline    ][main] Pipeline error {:pipeline_id=>"main", :exception=>#<NoMethodError: undefined method `[]' for false:FalseClass>, :backtrace=>["/home/elastic/softwares/logstash-7.11.1/vendor/bundle/jruby/2.5.0/gems/logstash-input-jms-3.1.2-java/lib/logstash/inputs/jms.rb:197:in `jms_config_from_yaml'", "/home/elastic/softwares/logstash-7.11.1/vendor/bundle/jruby/2.5.0/gems/logstash-input-jms-3.1.2-java/lib/logstash/inputs/jms.rb:155:in `jms_config'", "/home/elastic/softwares/logstash-7.11.1/vendor/bundle/jruby/2.5.0/gems/logstash-input-jms-3.1.2-java/lib/logstash/inputs/jms.rb:141:in `register'", "/home/elastic/softwares/logstash-7.11.1/logstash-core/lib/logstash/java_pipeline.rb:228:in `block in register_plugins'", "org/jruby/RubyArray.java:1809:in `each'", "/home/elastic/softwares/logstash-7.11.1/logstash-core/lib/logstash/java_pipeline.rb:227:in `register_plugins'", "/home/elastic/softwares/logstash-7.11.1/logs

Based on the error message I would expect a problem with the JMS input:

:backtrace=>[".../logstash/inputs/jms.rb:197:in `jms_config_from_yaml'"...

Hi,

You are correct my ems.yml file got corrupted. Now the logstash started successfully but the data in EMS does not have the field customerId as mentioned in second XML filter. Can you please help regarding the same.


{
	"_index": "inderforlogstash",
	"_type": "_doc",
	"_id": "hdYk7XcBIQw2IfVYeYW1",
	"_version": 1,
	"_seq_no": 3,
	"_primary_term": 1,
	"found": true,
	"_source": {
		"JMSXDeliveryCount": 1,
		"jms_timestamp": 1614591324822,
		"@version": "1",
		"serviceName": [
			"SubmitPayment"
		],
		"jms_redelivered": false,
		"payload": [
			"&lt;?xml version=\"1.0\" encoding=\"UTF-8\"?&gt;&lt;CustomerPayment xmlns=\"http://xmlns.ericsson.com/CDM/Payment\" xmlns:ns1=\"http://xmlns.ericsson.com/CDM/Base\"&gt;&lt;ns1:customerId&gt;CUST0000000023&lt;/ns1:customerId&gt;&lt;ns1:billingAccountId&gt;BA0000000044&lt;/ns1:billingAccountId&gt;&lt;transactionType&gt;DEPOSIT12&lt;/transactionType&gt;&lt;transactionRefNum&gt;228526&lt;/transactionRefNum&gt;&lt;Amount&gt;&lt;ns1:amount&gt;1.0E16&lt;/ns1:amount&gt;&lt;ns1:units&gt;&lt;ns1:currencyCode&gt;IDR&lt;/ns1:currencyCode&gt;&lt;/ns1:units&gt;&lt;/Amount&gt;&lt;referenceDate&gt;2020-12-21&lt;/referenceDate&gt;&lt;entryDate&gt;2021-02-11&lt;/entryDate&gt;&lt;/CustomerPayment&gt;"
		],
		"jms_reply_to": null,
		"jms_type": "",
		"systemDestination": [
			"EB"
		],
		"message": "<tns:ActivityInput xmlns:tns=\"http://www.tibco.com/namespaces/tnt/plugins/jms+32553768-9d2e-4ed0-90ae-0a1e20803547+input\" xmlns:tns2=\"http://www.ericsson.com/tibco/schema/Logger\">\n  <Body>\n    <tns2:Logger_Request>\n      <tns2:conversationId>TIB-22b0be25-9e87-49ba-a3c0-1b617c82212a</tns2:conversationId>\n      <tns2:correlationId>4618bdb4-36fb-4a37-b161-7fb07aeaedd6</tns2:correlationId>\n      <tns2:eventId>CUST0000000023</tns2:eventId>\n      <tns2:logTimestamp>2021-02-17T17:49:39.828+05:30</tns2:logTimestamp>\n      <tns2:type>START</tns2:type>\n      <tns2:businessReferenceId>228526</tns2:businessReferenceId>\n      <tns2:systemConsumer>DFE</tns2:systemConsumer>\n      <tns2:systemDestination>EB</tns2:systemDestination>\n      <tns2:serviceName>SubmitPayment</tns2:serviceName>\n      <tns2:operationName>POST</tns2:operationName>\n      <tns2:payload>&lt;?xml version=\"1.0\" encoding=\"UTF-8\"?&gt;&lt;CustomerPayment xmlns=\"http://xmlns.ericsson.com/CDM/Payment\" xmlns:ns1=\"http://xmlns.ericsson.com/CDM/Base\"&gt;&lt;ns1:customerId&gt;CUST0000000023&lt;/ns1:customerId&gt;&lt;ns1:billingAccountId&gt;BA0000000044&lt;/ns1:billingAccountId&gt;&lt;transactionType&gt;DEPOSIT12&lt;/transactionType&gt;&lt;transactionRefNum&gt;228526&lt;/transactionRefNum&gt;&lt;Amount&gt;&lt;ns1:amount&gt;1.0E16&lt;/ns1:amount&gt;&lt;ns1:units&gt;&lt;ns1:currencyCode&gt;IDR&lt;/ns1:currencyCode&gt;&lt;/ns1:units&gt;&lt;/Amount&gt;&lt;referenceDate&gt;2020-12-21&lt;/referenceDate&gt;&lt;entryDate&gt;2021-02-11&lt;/entryDate&gt;&lt;/CustomerPayment&gt;</tns2:payload>\n      <tns2:Log-Level>INFO</tns2:Log-Level>\n      <tns2:appSpace>BWEclipseAppSpace</tns2:appSpace>\n      <tns2:appNode>BWEclipseAppNode</tns2:appNode>\n      <tns2:engine>Main</tns2:engine>\n      <tns2:appModule>GW.SubmitPayment</tns2:appModule>\n    </tns2:Logger_Request>\n  </Body>\n</tns:ActivityInput>",
		"jms_destination": "Queue[q.logstash]",
		"jms_expiration": 0,
		"jms_priority": 4,
		"jms_correlation_id": "",
		"conversationId": [
			"TIB-22b0be25-9e87-49ba-a3c0-1b617c82212a"
		],
		"systemConsumer": [
			"DFE"
		],
		"eventId": [
			"CUST0000000023"
		],
		"jms_message_id": "ID:EMS-SERVER.18E3602E2E8670:8",
		"correlationId": [
			"4618bdb4-36fb-4a37-b161-7fb07aeaedd6"
		],
		"@timestamp": "2021-03-01T09:35:24.822Z",
		"jms_delivery_mode_sym": "non_persistent"
	}
}

I am a bit confused - according to your logstash config - the fields of the first xml filter should be stored in an element doc:

target => "doc"

Are you renaming the fields or do you have changed the first xml filter?

Anyway, the solution is simple, as the payload field is in the root of the document simply change the second xml filter to:

xml{
        remove_namespaces => "true"
        source => "payload"
        store_xml => "false"
        target => "doc2"
        xpath =>  [
                   "//customerId/text()","customerId"
        ]
}

Hi,

Tried this but still not working. Sharing the complete logstash config file and to answer your question - Iam not updating data anywhere in between. Is there can be a possibility that the xml is stored as a string and logstash is not able to understand. Can we search a string with in and substring the value?

input {
   jms {
      include_header => true
      include_properties => true
      include_body => true
      use_jms_timestamp => true
      timeout => -1
      destination => "q.logstash"
      pub_sub => false
      yaml_file => "/home/elastic/softwares/logstash-7.11.1/config/ems.yml"
      yaml_section => "ems"
   }
}

filter
{
 xml {
        remove_namespaces => "true"
        source => "message"
        store_xml => "false"
        target => "doc"
        xpath =>  [
                   "//conversationId/text()","conversationId",
                   "//eventId/text()","eventId",
                   "//correlationId/text()","correlationId",
                   "//systemConsumer/text()","systemConsumer",
                   "//systemDestination/text()","systemDestination",
                   "//serviceName/text()","serviceName",
                   "//payload/text()","payload"
        ]

    }
  xml {
        remove_namespaces => "true"
        source => "payload"
        store_xml => "false"
        target => "doc2"
        xpath =>  [
                   "//customerId/text()","customerId"
        ]
      }
}

output{
        elasticsearch {
        hosts => ["localhost:9200"]
        index => "inderforlogstash"
        }
}

Hello Rabin,

I missunderstood the setting store_xml:
If store_xml is set to true the parsed XML will be stored in the target field. if store_xml is set to false only the xpath entries are stored and the target names are not affected by the target setting.

Is there a reason why you are using the xpath setting instead of store_xml? Otherwise you could try it this way:

input {
   jms {
      include_header => true
      include_properties => true
      include_body => true
      use_jms_timestamp => true
      timeout => -1
      destination => "q.logstash"
      pub_sub => false
      yaml_file => "/home/elastic/softwares/logstash-7.11.1/config/ems.yml"
      yaml_section => "ems"
   }
}

filter
{
 xml {
        remove_namespaces => "true"
        source => "message"
        store_xml => "true"
        target => "doc"
    }
  xml {
        remove_namespaces => "true"
        source => "[doc][payload]"
        store_xml => "true"
        target => "doc2"
      }
}

output{
        elasticsearch {
        hosts => ["localhost:9200"]
        index => "inderforlogstash"
        }
}

If this still does not work it might be worth to enable debug logging and to check if the XML filter logs anything of interest why the fields are not parsed.

Hi,

When i mention the above filter iam able to get doc but not doc2 . I very new to logstash. Not able to understand what to do. I changed the 2nd filter as follows still no help :frowning:

source => "[doc][Body][Logger_Request][payload]"

	"doc": {
			"Body": [
				{
					"Logger_Request": [
						{
							"businessReferenceId": [
								"228526"
							],
							"Log-Level": [
								"INFO"
							],
							"serviceName": [
								"SubmitPayment"
							],
							"appModule": [
								"GW.SubmitPayment"
							],
							"engine": [
								"Main"
							],
							"conversationId": [
								"TIB-22b0be25-9e87-49ba-a3c0-1b617c82212a"
							],
							"correlationId": [
								"4618bdb4-36fb-4a37-b161-7fb07aeaedd6"
							],
							"systemConsumer": [
								"DFE"
							],
							"eventId": [
								"CUST0000000023"
							],
							"logTimestamp": [
								"2021-02-17T17:49:39.828+05:30"
							],
							"payload": [
								"<?xml version=\"1.0\" encoding=\"UTF-8\"?><CustomerPayment xmlns=\"http://xmlns.ericsson.com/CDM/Payment\" xmlns:ns1=\"http://xmlns.ericsson.com/CDM/Base\"><ns1:customerId>CUST0000000023</ns1:customerId><ns1:billingAccountId>BA0000000044</ns1:billingAccountId><transactionType>DEPOSIT12</transactionType><transactionRefNum>228526</transactionRefNum><Amount><ns1:amount>1.0E16</ns1:amount><ns1:units><ns1:currencyCode>IDR</ns1:currencyCode></ns1:units></Amount><referenceDate>2020-12-21</referenceDate><entryDate>2021-02-11</entryDate></CustomerPayment>"
							],
							"type": [
								"START"
							],
							"appNode": [
								"BWEclipseAppNode"
							],
							"systemDestination": [
								"EB"
							],
							"operationName": [
								"POST"
							],
							"appSpace": [
								"BWEclipseAppSpace"
							]
						}
					]
				}
			],
			"xmlns:tns2": "http://www.ericsson.com/tibco/schema/Logger",
			"xmlns:tns": "http://www.tibco.com/namespaces/tnt/plugins/jms+32553768-9d2e-4ed0-90ae-0a1e20803547+input"
		}

Hi,

The issues is resolved with following filter.

filter
{
 xml {
        remove_namespaces => "true"
        source => "message"
        store_xml => "true"
        target => "doc"
        xpath =>  [
                   "//conversationId/text()","conversationId",
                   "//eventId/text()","eventId",
                   "//correlationId/text()","correlationId",
                   "//systemConsumer/text()","systemConsumer",
                   "//systemDestination/text()","systemDestination",
                   "//serviceName/text()","serviceName",
                   "//payload/text()","payload"
        ]



    }
  xml {
        remove_namespaces => "true"
        source => "[doc][Body][0][Logger_Request][0][payload][0]"
        store_xml => "true"
        target => "doc2"
        xpath =>  [
                   "//customerId/text()","customerId",
                   "//billingAccountId/text()","billingAccountId"
        ]
      }
}

Thanks,
Rabin

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.