Vega chart for matrix stats (correlation) split!

Hey

following Vega heatmap chart for matrix stats (correlation)
Here is a vega displaying a heatmap of correlation matrix (matrix stats Elasticsearch api call) split by a key (isFraud in this example).
A bit long, but it's worth it!

{
  "$schema": "https://vega.github.io/schema/vega/v4.3.json",
  "title": "A heatmap showing the correlation of features",
  "background": "white",
  "padding": 0,
  "data": [
    {
      "name": "feature_correlation",
      "url": {
        "%context%": true,
        "index": "fraud*",
        "body": {
          "size": 0,
          "aggs": {
            "correlation_stats": {
              "composite": {
                "sources": [{"isFraud": {"terms": {"field": "isFraud"}}}]
              },
              "aggregations": {
                "statistics": {
                  "matrix_stats": {
                    "fields": [
                      "amount",
                      "oldBalanceOrig",
                      "newBalanceOrig",
                      "oldBalanceDest",
                      "newBalanceDest",
                      "errorBalanceOrig",
                      "errorBalanceDest"
                    ]
                  }
                }
              }
            }
          }
        }
      },
      "format": {"property": "aggregations.correlation_stats.buckets"},
      "transform": [
        {"type": "flatten", "fields": ["statistics.fields"], "as": ["f"]},
        {
          "type": "project",
          "fields": [
            "key.isFraud",
            "f.name",
            "f.correlation.amount",
            "f.correlation.newBalanceOrig",
            "f.correlation.oldBalanceOrig",
            "f.correlation.newBalanceDest",
            "f.correlation.oldBalanceDest",
            "f.correlation.errorBalanceOrig",
            "f.correlation.errorBalanceDest"
          ],
          "as": [
            "isFraud",
            "name",
            "amount",
            "newBalanceOrig",
            "oldBalanceOrig",
            "newBalanceDest",
            "oldBalanceDest",
            "errorBalanceOrig",
            "errorBalanceDest"
          ]
        },
        {
          "type": "fold",
          "fields": [
            "amount",
            "newBalanceOrig",
            "oldBalanceOrig",
            "newBalanceDest",
            "oldBalanceDest",
            "errorBalanceOrig",
            "errorBalanceDest"
          ]
        },
        {"type": "project", "fields": ["isFraud", "name", "key", "value"]}
      ]
    },
    {
      "name": "column_domain",
      "source": "feature_correlation",
      "transform": [{"type": "aggregate", "groupby": ["isFraud"]}]
    }
  ],
  "signals": [
    {"name": "child_width", "value": 500},
    {"name": "child_height", "value": 600}
  ],
  "layout": {
    "padding": 20,
    "offset": {"columnTitle": 10},
    "columns": {"signal": "length(data('column_domain'))"},
    "bounds": "full",
    "align": "all"
  },
  "marks": [
    {
      "name": "column-title",
      "type": "group",
      "role": "column-title",
      "title": {"text": "isFraud", "style": "guide-title", "offset": 10}
    },
    {
      "name": "row_header",
      "type": "group",
      "role": "row-header",
      "encode": {"update": {"height": {"signal": "child_height"}}}
    },
    {
      "name": "column_header",
      "type": "group",
      "role": "column-header",
      "from": {"data": "column_domain"},
      "sort": {"field": "datum[\"isFraud\"]", "order": "ascending"},
      "title": {
        "text": {"signal": "parent[\"isFraud\"]"},
        "style": "guide-label",
        "frame": "group",
        "offset": 10
      },
      "encode": {"update": {"width": {"signal": "child_width"}}}
    },
    {
      "name": "cell",
      "type": "group",
      "style": "cell",
      "from": {
        "facet": {
          "name": "facet",
          "data": "feature_correlation",
          "groupby": ["isFraud"]
        }
      },
      "sort": {"field": ["datum[\"isFraud\"]"], "order": ["ascending"]},
      "encode": {
        "update": {
          "width": {"signal": "child_width"},
          "height": {"signal": "child_height"}
        }
      },
      "marks": [
        {
          "type": "rect",
          "from": {"data": "facet"},
          "encode": {
            "enter": {
              "x": {"scale": "x", "field": "name"},
              "y": {"scale": "y", "field": "key"},
              "width": {"scale": "x", "band": 1},
              "height": {"scale": "y", "band": 1},
              "tooltip": {
                "signal": "datum.name + ' - ' + datum.key + ': ' + format(datum.value, '.3f')"
              }
            },
            "update": {"fill": {"scale": "color", "field": "value"}}
          }
        }
      ],
      "legends": [
        {
          "fill": "color",
          "type": "gradient",
          "title": "Correlation",
          "titleFontSize": 12,
          "titlePadding": 4,
          "gradientLength": {"signal": "child_height - 16"}
        }
      ],
      "axes": [
        {
          "orient": "bottom",
          "scale": "x",
          "labelAngle": -45,
          "tickOffset": 0,
          "labelAlign": "right",
          "domain": false
        },
        {"orient": "left", "scale": "y", "domain": false}
      ]
    }
  ],
  "scales": [
    {
      "name": "x",
      "type": "band",
      "domain": {"data": "feature_correlation", "field": "name"},
      "range": [0, {"signal": "child_width"}],
      "nice": true,
      "zero": true
    },
    {
      "name": "y",
      "type": "band",
      "domain": {"data": "feature_correlation", "field": "key"},
      "range": [0, {"signal": "child_height"}]
    },
    {
      "name": "color",
      "type": "linear",
      "range": {"scheme": "yelloworangebrown"},
      "domain": {"data": "feature_correlation", "field": "value"},
      "zero": false,
      "nice": true
    }
  ]
}

3 Likes

That looks great, thanks, Vincent!

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.