Vega heatmap chart for matrix stats (correlation)

Hi
I struggled to display a heatmap showing the correlation of fields (calling the matrix_stats agg from Elasticsearch).
Many many thanks to my colleague Yuri that helped me on this :+1:
Here is the code that works:

{
  "$schema": "https://vega.github.io/schema/vega/v4.3.json",
  "title": "A heatmap showing the correlation of features",
  "data": [
    {
      "name": "feature_correlation",
      "url": {
        %context%: true
        index: fraud*
        body: {
          size: 0
          aggs: {
              statistics: {
                  matrix_stats: {
                      fields: ["amount", "oldBalanceOrig", "newBalanceOrig", "oldBalanceDest", "newBalanceDest", "errorBalanceOrig", "errorBalanceDest"]
                  }
              }
          }
        }
      }
      format: {property: "aggregations.statistics.fields"}
      "transform": [
        {"type": "formula", "expr": "datum.correlation.amount", "as": "amount"}
		{"type": "formula", "expr": "datum.correlation.oldBalanceOrig", "as": "oldBalanceOrig"}
		{"type": "formula", "expr": "datum.correlation.newBalanceOrig", "as": "newBalanceOrig"}
		{"type": "formula", "expr": "datum.correlation.oldBalanceDest", "as": "oldBalanceDest"}
		{"type": "formula", "expr": "datum.correlation.newBalanceDest", "as": "newBalanceDest"}
		{"type": "formula", "expr": "datum.correlation.errorBalanceOrig", "as": "errorBalanceOrig"}
		{"type": "formula", "expr": "datum.correlation.errorBalanceDest", "as": "errorBalanceDest"}
        {"type": "fold", "fields": [ "amount", "oldBalanceOrig", "newBalanceOrig", "oldBalanceDest", "newBalanceDest", "errorBalanceOrig", "errorBalanceDest"]}
        {"type": "project", "fields": ["name", "key", "value"]}

	  ]
	}
  ],

  "scales": [
    {
      "name": "x",
      "type": "band",
      "domain": {"data": "feature_correlation", "field": "name"},
      "range": "width"
    },
    {
      "name": "y",
      "type": "band",
      "domain": {"data": "feature_correlation", "field": "key"},
      "range": "height"
    },
    {
      "name": "color",
      "type": "linear",
      "range": {"scheme": "yelloworangebrown"},
      "domain": {"data": "feature_correlation", "field": "value"},
      "zero": false, "nice": true
    }
  ],

  "axes": [
    {"orient": "bottom", "scale": "x", "domain": false},
    {"orient": "left", "scale": "y", "domain": false}
  ],

  "legends": [
    {
      "fill": "color",
      "type": "gradient",
      "title": "Correlation",
      "titleFontSize": 12,
      "titlePadding": 4,
      "gradientLength": {"signal": "height - 16"}
    }
  ],

  "marks": [
    {
      "type": "rect",
      "from": {"data": "feature_correlation"},
      "encode": {
        "enter": {
          "x": {"scale": "x", "field": "name"},
          "y": {"scale": "y", "field": "key"},
          "width": {"scale": "x", "band": 1},
          "height": {"scale": "y", "band": 1},
		  "tooltip": {"signal": "datum.name + ' - ' + datum.key + ': ' + format(datum.value, '.3f')"}
        },
        "update": {
          "fill": {"scale": "color", "field": "value"}
        }
      }
    }
  ]
}

and the result:

Happy Vega!

5 Likes

This is great. Thanks for sharing!

and enhanced by a split !

1 Like

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.