Logstash-extract multiple subfield values in multiple events

Hi,

I have below type of events(fields value can be dynamic). I'm trying to split field's key, value as new event.
I'm able to do it for two fields(TOTAl_VOLUME, SUCCESS_VOLUME), but when i try for 3rd field, logstash stop responding.

{
           "agentId" => "Log_Agent",
         "@metadata" => {
                  "txnId1" => "GET#/txn/branchserver17.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12",
        "A1EvtFingerprint" => "AGENTID=Log_Agent&TIME=1657708200000&RESPTYPE=DC",
               "indexname" => "heal_collated_agent_txn",
                  "txnId2" => "GET#/txn/branchserver17.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12",
               "tablename" => "agent_transactions_data",
               "accountid" => "mle_account",
        "enable_rubydebug" => "true"
    },
      "max_response" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588
    },
     "response_type" => "DC",
    "aggLevelInMins" => 15,
         "timeInGMT" => 1657708200000,
      "avg_response" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 4.5954742431640625,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 4.6110687255859375,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 4.580192565917969
    },
           "timeout" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 777,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 839,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 781
    },
           "unknown" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 773,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 794,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 746
    },
              "fail" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 770,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 737,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 800
    },
        "@timestamp" => 2022-07-13T10:30:00.000Z,
              "slow" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 782,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 788,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 744
    },
      "min_response" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.0,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.0,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.0
    },
            "dcKpis" => {
          "TOTAl_VOLUME" => 10957,
        "SUCCESS_VOLUME" => 7776
    },
          "@version" => "1"
}

Desired output should be:

          "txnId" : "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12",
          "timeInGMT" : 1657048320000,
          "dcKpis" : {
            "UNKNOWN_VOLUME" : 59.0,
            "TIMEOUT_VOLUME" : 59.0,
            "FAIL_VOLUME" : 59.0,
            "MIN_RESPONSE_TIME" : 1000000.0,
            "TOTAL_VOLUME" : 236.0,
            "RESPONSE_TIME" : 1000000.0,
            "SUCCESS_VOLUME" : 59.0,
            "MAX_RESPONSE_TIME" : 1000000.0,
            "SLOW_VOLUME" : 0.0
          },`Preformatted text`

Following is my pipeline:

      ruby {
        code => '
          values =  event.get("total")
          if values.is_a? Hash
            someField1 = []
            values.each { |k, v|
                someField1 << { "txnId1" => k, "total" => v }
            }
            event.set("someField1", someField1)
          end
          event.remove("total")
         '
      }


      ruby {
        code => '
          values =  event.get("success")
          if values.is_a? Hash
            someField2 = []
            values.each { |k, v|
                someField2 << { "txnId2" => k, "success" => v }
            }
            event.set("someField2", someField2)
          end
          event.remove("success")
         '
      }


      split {
        field => 'someField1'
      }
      split {
        field => 'someField2'
      }



      mutate {
        rename => {
          "[someField1][txnId1]" => "[@metadata][txnId1]"
          "[someField1][total]" => "[dcKpis][TOTAl_VOLUME]"
          "[someField2][txnId2]" => "[@metadata][txnId2]"
          "[someField2][success]" => "[dcKpis][SUCCESS_VOLUME]"
        }
        remove_field => ["someField1","someField2","someField3","someField4","someField5","someField6","someField7","someField8","someField9"]
      }


Please suggest if anybody aware about this.

Regards,
Akshay Kulkarnni

Your required output appears to be unrelated to the input object, and your ruby code references fields that do not exist, so it is hard to say what you need to change.

1 Like

Hi @Badger, Thanks for the reply,

With the above pipeline i have already taken "total" and "success" fields under "dcKpis" field.

With the same logic, when I tried with adding ruby code chunk for max_response, avg_response & forth in pipeline, logstash gets stucked.

For your information, im pulling data from cassandra db, above attached logs are logstash output logs(stdout) where im succeed with 2 fields, total & success but stucked for other fields(split doesn't work for more than 2 fields.

You are fetching a field called [total] but that does not appear to exist in the rubydebug output that you show. You say you want "UNKNOWN_VOLUME" : 59.0, but the value 59 does not occur in the output that you show, so where is it coming from?

That is the sample only. Please have a look on below event, have rearranged data.

Note: txn_id (GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12) are dynamic & more than three.

Input Event:

{
           "agentId" => "Log_Agent",
         "@metadata" => {
        "A1EvtFingerprint" => "AGENTID=Log_Agent&TIME=1657708200000&RESPTYPE=DC",
               "indexname" => "heal_collated_agent_txn",
               "tablename" => "agent_transactions_data",
               "accountid" => "mle_account",
        "enable_rubydebug" => "true"
    },
      "total" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588
    },
      "success" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588
    },	
      "max_response" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588
    },
     "response_type" => "DC",
    "aggLevelInMins" => 15,
         "timeInGMT" => 1657708200000,
      "avg_response" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 4.5954742431640625,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 4.6110687255859375,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 4.580192565917969
    },
           "timeout" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 777,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 839,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 781
    },
           "unknown" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 773,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 794,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 746
    },
              "fail" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 770,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 737,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 800
    },
        "@timestamp" => 2022-07-13T10:30:00.000Z,
              "slow" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 782,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 788,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 744
    },
      "min_response" => {
        "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.0,
        "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.0,
        "GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.0
    },
          "@version" => "1"
}

Desired output should be:

{
          "txnId" : "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12",
          "timeInGMT" : 1657048320000,
          "dcKpis" : {
            "UNKNOWN_VOLUME" : 773,
            "TIMEOUT_VOLUME" : 777,
            "FAIL_VOLUME" : 770,
            "MIN_RESPONSE_TIME" : 50,
            "TOTAL_VOLUME" : 5.011000156402588,
            "AVG_RESPONSE_TIME" : 4.5954742431640625,
			"MAX_RESPONSE_TIME" : 5.011000156402588,
            "SUCCESS_VOLUME" : 5.011000156402588,
            "SLOW_VOLUME" : 782
		}	
},


{
          "txnId" : "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12",
          "timeInGMT" : 1657048320000,
          "dcKpis" : {
            "UNKNOWN_VOLUME" : 794,
            "TIMEOUT_VOLUME" : 839,
            "FAIL_VOLUME" : 737,
            "MIN_RESPONSE_TIME" : 5.0,
            "TOTAL_VOLUME" : 5.011000156402588,
            "AVG_RESPONSE_TIME" : 4.6110687255859375,
			"MAX_RESPONSE_TIME" : 5.011000156402588,
            "SUCCESS_VOLUME" : 5.011000156402588,
            "SLOW_VOLUME" : 788
		}	
}

Note: With above ruby code I'm able to get output that i want but only for 2 fields.
split doesn't work more than 2 fields and logstash gets stuck.

You could try something like this.

    ruby {
        code => '
            #avg         = event.remove("avg_response")
            fail        = event.remove("fail")
            max         = event.remove("max_response")
            min         = event.remove("min_response")
            slow        = event.remove("slow")
            success     = event.remove("success")
            timeout     = event.remove("timeout")
            total       = event.remove("total")
            unknown     = event.remove("unknown")

            if total
                a = []
                total.each { |k, v|
                    h = {}
                    h["txnId"] = k

                    if fail;    h["FAIL_VOLUME"] = fail[k]; end
                    if max;     h["MAX_RESPONSE_TIME"] = max[k]; end
                    # add the rest of the fields in the same way
                    if unknown;  h["UNKNOWN_VOLUME"] = unknown[k]; end

                    a << h
                }
            event.set("[dcKpis]", a)
            end
        '
    }
    split { field => "[dcKpis]" }
    mutate { rename => { "[dcKpis][txnId]" => "txnId" } }

Another possible approach would be to take total as the definitive set of keys and then for each key in it, iterate over every field in the event to see if it is a hash that has the key "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" (etc.) and if so add that value to the dcKpis hash for that key. I wouldn't recommend that unless the set of data hashes is not known in advance, since you couldn't rename them, you would have to put up with using the hash name as the field name in dcKpis.

@Badger Thanks for the answer.

As always it worked pretty well. :+1: