Make elasticsearch aggregation response shorter

Hi,

I am dealing with elasticsearch aggregation response json like below.

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 236,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  },
  "aggregations": {
    "MinuteWiseAggs": {
      "buckets": [
        {
          "key_as_string": "2022-06-29 03:55:00",
          "key": 1656474900000,
          "doc_count": 236,
          "TxnIdAggs": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "POST#http:/|acc=heal_health",
                "doc_count": 236,
                "AgentAggs": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 0,
                  "buckets": [
                    {
                      "key": "d418a24d-d6f6-4e86-8d73-5935645e2798",
                      "doc_count": 236,
                      "ServiceIdAggs": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                          {
                            "key": "d4ef5861-49fa-4c04-9858-caba584ce442",
                            "doc_count": 236,
                            "RespTimeTypeAggs": {
                              "doc_count_error_upper_bound": 0,
                              "sum_other_doc_count": 0,
                              "buckets": [
                                {
                                  "key": "DC",
                                  "doc_count": 236
                                }
                              ]
                            },
                            "RespStatusFilter": {
                              "doc_count": 59,
                              "RespTimeAggs": {
                                "value": 1000000
                              },
                              "MaxRespTimeAggs": {
                                "value": 1000000
                              },
                              "MinRespTimeAggs": {
                                "value": 1000000
                              },
                              "RespTimePercentileAggs": {
                                "values": {
                                  "50.0": 1000000,
                                  "75.0": 1000000,
                                  "90.0": 1000000,
                                  "95.0": 1000000,
                                  "99.0": 1000000
                                }
                              }
                            },
                            "RespStatusAggs": {
                              "doc_count_error_upper_bound": 0,
                              "sum_other_doc_count": 0,
                              "buckets": [
                                {
                                  "key": "FAIL",
                                  "doc_count": 59
                                },
                                {
                                  "key": "GOOD",
                                  "doc_count": 59
                                },
                                {
                                  "key": "TIMEOUT",
                                  "doc_count": 59
                                },
                                {
                                  "key": "UNKNOWN",
                                  "doc_count": 59
                                }
                              ]
                            }
                          }
                        ]
                      }
                    }
                  ]
                },
                "ServiceIdAggs": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 0,
                  "buckets": [
                    {
                      "key": "d4ef5861-49fa-4c04-9858-caba584ce442",
                      "doc_count": 236,
                      "RespTimeTypeAggs": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                          {
                            "key": "DC",
                            "doc_count": 236
                          }
                        ]
                      },
                      "RespStatusFilter": {
                        "doc_count": 59,
                        "RespTimeAggs": {
                          "value": 1000000
                        },
                        "MaxRespTimeAggs": {
                          "value": 1000000
                        },
                        "MinRespTimeAggs": {
                          "value": 1000000
                        },
                        "RespTimePercentileAggs": {
                          "values": {
                            "50.0": 1000000,
                            "75.0": 1000000,
                            "90.0": 1000000,
                            "95.0": 1000000,
                            "99.0": 1000000
                          }
                        }
                      },
                      "RespStatusAggs": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                          {
                            "key": "FAIL",
                            "doc_count": 59
                          },
                          {
                            "key": "GOOD",
                            "doc_count": 59
                          },
                          {
                            "key": "TIMEOUT",
                            "doc_count": 59
                          },
                          {
                            "key": "UNKNOWN",
                            "doc_count": 59
                          }
                        ]
                      }
                    }
                  ]
                }
              }
            ]
          }
        }
      ]
    }
  }
}

And I'm trying to re-arrange it like below, but no luck. It will be a great help if somebody drop few lines.


"TxnIdAggs": "POST#http:/|acc=heal_health",
"AgentAggs":"d418a24d-d6f6-4e86-8d73-5935645e2798",
"ServiceIdAggs": "d4ef5861-49fa-4c04-9858-caba584ce442",
"RespTimeTypeAggs": "DC",
"RespStatusFilter": {
   "RespTimeAggs": 1000000,
   "MaxRespTimeAggs": 1000000
   "MinRespTimeAggs": 1000000
},
"RespTimePercentileAggs": {
        "50.0": 1000000,
        "75.0": 1000000,
        "90.0": 1000000,
        "95.0": 1000000,
        "99.0": 1000000
},
"RespStatusAggs": {
    "FAIL": 0,
    "GOOD": 0,
    "TIMEOUT": 0,
    "UNKNOWN": 0
},
"ServiceIdAggs": "d4ef5861-49fa-4c04-9858-caba584ce442",
"RespTimeTypeAggs": "DC",
"RespStatusFilter": {
    "RespTimeAggs": 1000000
    "MaxRespTimeAggs": 1000000
    "MinRespTimeAggs":1000000
},
"RespTimePercentileAggs": {
        "50.0": 1000000,
        "75.0": 1000000,
        "90.0": 1000000,
        "95.0": 1000000,
        "99.0": 1000000
},
"RespStatusAggs": {
    "FAIL": 0,
    "GOOD":0,
    "TIMEOUT":0,
    "UNKNOWN":0
}

Following is the logstash pipeline which parse json data and remove particular fields mentioned in ruby code. please suggest what im missing.

    input {
        file {
           path => "/home/logstash/logstash-6.8.22/bin/rollup-query-response-curl.json"
           start_position => "beginning"
        }
     }

     filter {
        json {
            source => "message"
        }
        mutate {
          remove_field => [ "message" ]
        }



      ruby {
        code => "
        event.to_hash.keys.each { |k|
        if k.start_with?('%{[sum_other_doc_count][#{k}]}')
          event.remove(k)
        end
        }
        "
        }

      ruby {
        code => "
        event.to_hash.keys.each { |k|
        if k.start_with?('%{[doc_count_error_upper_bound][#{k}]}')
          event.remove(k)
        end
        }
       "
       }

      ruby {
        code => "
        event.to_hash.keys.each { |k|
        if k.start_with?('%{[doc_count][#{k}]}')
          event.remove(k)
        end
        }
       "
       }
     }

     output
     {
        elasticsearch
        {
          hosts => ["http://192.168.13.107:9201"]
          user => XXXX
          password => XXXX
          index => "bucket-%{+YYYY.MM.dd}"
        }


      stdout { codec => rubydebug }
    }

Thanks in advance.

Regards,
Akshay.

That iterates over the top-level keys of the event, which in your case appear to be [_shards], [hits], [aggregations], etc. Your desired data format has several duplicate entries (RespStatusFilter, RespStatusAggs, etc.) and it is unclear where the values for RespStatusAggs come from.

Thanks @Badger for the reply.

I would like to know, is it possible to extract key & value from bucket(from field name "key") atleast.( Assuming there are multiple buckets and dynamic field names)

"TxnIdAggs" : "POST#http:/|acc=heal_health"

I'm sorry if I'm saying something wrong, but if you want to put the aggregated result back into elasticsearch, did you consider using transform?

Hi @Tomo_M

Can u pls. explain how to use it with logstash pipeline?

It's elasticsearch function. You don't need logstash.

Hi @Tomo_M thanks for the quick response.

My query is the same, if it is possible within logstash pipeline itself.

It's possible, but it will involve writing a lot of ruby code. This is an example that solves a small part of the problem...

    ruby {
        init => '
            def doSomething(object, name, keys, event)
#puts "doSomething called for #{name}"
                if object
                    if object.kind_of?(Hash) and object != {}
#puts object.to_s
#puts object.keys.to_s
                        if object.keys == [ "value" ]
                            event.set(name, object["value"])
                        else
                            object.each { |k, v| doSomething(v, "#{name}[#{k}]", keys, event) }
                        end
                    elsif object.kind_of?(Array) and object != []
                        object.each_index { |i|
                            doSomething(object[i], "#{name}[#{i}]", keys, event)
                        }
                    else
#puts "working on #{name}"
                    end
                end
            end
        '
        code => '
            event.to_hash.each { |k, v|
                    doSomething(v, "[#{k}]", @field, event)
            }
        '
    }

which will convert

                                                  "MaxRespTimeAggs" => {
                                                "value" => 1000000
                                            },
                                                         "doc_count" => 59,
                                                      "RespTimeAggs" => {
                                                "value" => 1000000
                                            },
                                                   "MinRespTimeAggs" => {
                                                "value" => 1000000
                                            }

into

                                            },
                                                   "MaxRespTimeAggs" => 1000000,
                                                         "doc_count" => 59,
                                                      "RespTimeAggs" => 1000000,
                                                   "MinRespTimeAggs" => 1000000

Since I'm new to the ruby, but will try to figure out rest of the things.

Thanks @Badger.

Hi,

I'm still stucked, Anybody has idea how to set, below key, value.

"TxnIdAggs": "POST#http:/|acc=heal_health",

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.