Creating Custom Scripted Aggregations across all the buckets in a pipeline

Here is some example data pertaining to my domain:

PUT test_index 
{
  "mappings": {
    "test_type": { 
      "_all":       { "enabled": false  }, 
      "properties": { 
        "dummy":    { "type": "keyword"  }, 
        "timestamp":     { "type": "date"  }, 
        "state":      { "type": "keyword" } ,
        "duration":  {"type": "integer"}
      }
    }
  }
}


POST test_index/test_type
{
  "dummy": "dummy",
  "timestamp": "2015-03-25T17:49:00.000Z",
  "state": "State_1",
  "duration": 50
}

POST test_index/test_type
{
  "dummy": "dummy",
  "timestamp": "2015-03-25T17:49:00.050Z",
  "state": "State_2",
  "duration": 100
}

POST test_index/test_type
{
  "dummy": "dummy",
  "timestamp": "2015-03-25T17:49:00.150Z",
  "state": "State_2",
  "duration": 1000
}

POST test_index/test_type
{
  "dummy": "dummy",
  "timestamp": "2015-03-25T17:49:01.150Z",
  "state": "State_1",
  "duration": 2000
}

I want to know the percentage duration of each state. For a Single state I can get it with the following script.

GET test_index/_search?size=0
{
  "aggs": {
    "dummy": {
      "terms": {
        "field": "dummy",
        "size": 10
      },
      "aggs": {
        "state": {
          "filter": {
            "term": {
              "state": "State_1"
            }
          },
          "aggs": {
            "state_duration": {
              "sum": {
                "field": "duration"
              }
            }
          }
        },
        "total_duration_1": {
          "sum": {
            "field": "duration"
          }
        },
        "percentage_duration": {
          "bucket_script": {
            "buckets_path": {
              "state_duration": "state>state_duration",
              "total_duration": "total_duration_1"
            },
            "script": "Math.round(params.state_duration / params.total_duration * 100)/100.0"
          }
        }
      }
}

}
}

However what I want to do is to get this for all the states automatically using a terms aggregation.

GET test_index/_search?size=0
{
  "aggs": {
    "dummy": {
      "terms": {
        "field": "dummy",
        "size": 10
      },
      "aggs": {
        "state": {
          "terms": {
            "field": "state",
            "size": 10
          },
          "aggs": {
            "state_duration": {
              "sum": {
                "field": "duration"
              }
            }
          }
        },
        "total_duration_1": {
          "sum": {
            "field": "duration"
          }
        },
        "total_duration_2":{
          "sum_bucket": {
            "buckets_path": "state>state_duration"
          }
        },
        "percentage_duration": {
          "bucket_script": {
            "buckets_path": {
              "state_duration": "state>state_duration",
              "total_duration": "total_duration_1"
            },
            "script": "Math.round(params.state_duration / params.total_duration * 100)/100.0"
          }
        }
      }
    }
  }
}

However, this fails with the following error ""buckets_path must reference either a number value or a single value numeric metric aggregation, got: java.lang.Object[]" because the bucket script cannot handle an array which comes out of the terms aggregation.

Is there any way to run a script on all the terms in a bucket rather than choosing individual ones manually? (or doing the math in the client side?)

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.