Vega and aggregations

vega

#1

Hi,
I'm using the following search in Elasticsearch and have trouble to graph it in Vega:

GET myindex-*/_search
{
  "size": 0,
  "aggs": {
    "2": {
      "terms": {
        "field": "flow_id",
        "size": 150,
        "order": {
          "_key": "asc"
        }
      },
      "aggs": {
        "1": {
          "top_hits": {
            "docvalue_fields": [
              "flow_id", "src_port", "src_ip.keyword", "dest_ip.keyword",  "dest_port" 
            ],
            "_source": "error",
            "size": 1,
            "sort": [
              {
                "timestamp": {
                  "order": "asc"
                }
              }
            ]
          }
        }
      }
    }
  }
}

Elasticsearch returns the following ok:

{
 (...)
},
  "hits" : {
    "total" : 1236558,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "2" : {
      "buckets" : [
        {
          "key" : 370738138,
          "doc_count" : 2,
          "1" : {
            "hits" : {
              "total" : 2,
              "max_score" : null,
              "hits" : [
                {
                  "_index" : "myindex-2018-09-22",
                  "_type" : "doc",
                  "_id" : "qmLGAGYBg57XfGKiVS76",
                  "_score" : null,
                  "_source" : { },
                  "fields" : {
                    "src_port" : [
                      37402
                    ],
                    "src_ip.keyword" : [
                      "192.168.1.1"
                    ],
                    "dest_ip.keyword" : [
                      "192.168.1.2"
                    ],
                    "flow_id" : [
                      370738138
                    ],
                    "dest_port" : [
                      80
                    ]
                  },
                  "sort" : [
                    1537611289850
                  ]
                }
              ]
            }
          }
        },

I'm trying to graph the data (src_port, src_ip.keyword, dest_ip.keyword, dest_port) with the following Vega PCG code:

{
  "$schema": "https://vega.github.io/schema/vega/v3.json",

  "data": [
  {
    name: data
    url: {
      %context%: true
      %timefield%: timestamp

      index: myindex-*
      body: {
        "size": 0,
        "aggs": {
          "2": {
            "terms": {
              "field": "flow_id",
              "size": 150,
              "order": {
                "_key": "asc"
              }
            },
            "aggs": {
              "1": {
                "top_hits": {
                  "docvalue_fields": [
                    "flow_id", "src_port", "src_ip.keyword", "dest_ip.keyword",  "dest_port" 
                  ],
                  "_source": "error",
                  "size": 1,
                  "sort": [
                    {
                      "timestamp": {
                        "order": "asc"
                      }
                    }
                  ]
                }
              }
            }
          }
        }
      }
    },

    format: {property: "aggregations.2.buckets.hits"},
    
    transform: [
        { type: "formula", expr: "datum._source.src_port", as: "src_port"}
        { type: "formula", expr: "datum._source.src_ip.keyword", as: "src_ip"}
        { type: "formula", expr: "datum._source.dest_ip.keyword", as: "dest_ip"}
        { type: "formula", expr: "datum._source.dest_port", as: "dest_port"}
      ] 
      },
      {
      "name": "fields",
      "values": [
        "src_port",
        "src_ip",
        "dest_ip",
        "dest_port",
      ]
    }
  ],

  scales: [
    {
      "name": "ord", "type": "point",
      "range": "width", "round": true,
      "domain": {"data": "fields", "field": "src_port"}
    },
    {
      "name": "src_port", "type": "linear",
      "range": "height", "zero": false, "nice": true,
      "domain": {"data": "data", "field": "src_port"}
    },
    {
      "name": "src_ip", "type": "band",
      "range": "height", "zero": false, "nice": true,
      "domain": {"data": "data", "field": "src_ip"}
    },
    {
      "name": "dest_ip", "type": "band",
      "range": "height", "zero": false, "nice": true,
      "domain": {"data": "data", "field": "dest_ip"}
    },
    {
      "name": "dest_port", "type": "linear",
      "range": "height", "zero": false, "nice": true,
      "domain": {"data": "data", "field": "dest_port"}
    }
  ],

  "axes": [
    {
      "orient": "left", "zindex": 1,
      "scale": "src_port", "title": "src_port",
      "offset": {"scale": "ord", "value": "src_port", "mult": -1}
    },
    {
      "orient": "left", "zindex": 1,
      "scale": "src_ip", "title": "src_ip",
      "offset": {"scale": "ord", "value": "src_ip", "mult": -1}
    },
    {
      "orient": "left", "zindex": 1,
      "scale": "dest_ip", "title": "dest_ip",
      "offset": {"scale": "ord", "value": "dest_ip", "mult": -1}
    },
    {
      "orient": "left", "zindex": 1,
      "scale": "dest_port", "title": "dest_port",
      "offset": {"scale": "ord", "value": "dest_port", "mult": -1}
    }
  ],

  "marks": [
    {
      "type": "group",
      "from": {"data": "data"},
      "marks": [
        {
          "type": "line",
          "from": {"data": "fields"},
          "encode": {
            "enter": {
              "x": {"scale": "ord", "field": "data"},
              "y": {
                "scale": {"datum": "data"},
                "field": {"parent": {"datum": "data"}}
              },
              "stroke": {"value": "steelblue"},
              "strokeWidth": {"value": 1.01},
              "strokeOpacity": {"value": 0.3}
            }
          }
        }
      ]
    }
  ]
}

Doing so returns a "data is undefined". Is there something wrong? I have tested the code without aggs and it was OK with (just the result was not as precise as with the aggs). Using latest ELK version.

Thanks,
Dany


(kulkarni) #2

cc @nyuriks