Vega chart for matrix stats (correlation) split!


following Vega heatmap chart for matrix stats (correlation)
Here is a vega displaying a heatmap of correlation matrix (matrix stats Elasticsearch api call) split by a key (isFraud in this example).
A bit long, but it's worth it!

  "$schema": "",
  "title": "A heatmap showing the correlation of features",
  "background": "white",
  "padding": 0,
  "data": [
      "name": "feature_correlation",
      "url": {
        "%context%": true,
        "index": "fraud*",
        "body": {
          "size": 0,
          "aggs": {
            "correlation_stats": {
              "composite": {
                "sources": [{"isFraud": {"terms": {"field": "isFraud"}}}]
              "aggregations": {
                "statistics": {
                  "matrix_stats": {
                    "fields": [
      "format": {"property": "aggregations.correlation_stats.buckets"},
      "transform": [
        {"type": "flatten", "fields": ["statistics.fields"], "as": ["f"]},
          "type": "project",
          "fields": [
          "as": [
          "type": "fold",
          "fields": [
        {"type": "project", "fields": ["isFraud", "name", "key", "value"]}
      "name": "column_domain",
      "source": "feature_correlation",
      "transform": [{"type": "aggregate", "groupby": ["isFraud"]}]
  "signals": [
    {"name": "child_width", "value": 500},
    {"name": "child_height", "value": 600}
  "layout": {
    "padding": 20,
    "offset": {"columnTitle": 10},
    "columns": {"signal": "length(data('column_domain'))"},
    "bounds": "full",
    "align": "all"
  "marks": [
      "name": "column-title",
      "type": "group",
      "role": "column-title",
      "title": {"text": "isFraud", "style": "guide-title", "offset": 10}
      "name": "row_header",
      "type": "group",
      "role": "row-header",
      "encode": {"update": {"height": {"signal": "child_height"}}}
      "name": "column_header",
      "type": "group",
      "role": "column-header",
      "from": {"data": "column_domain"},
      "sort": {"field": "datum[\"isFraud\"]", "order": "ascending"},
      "title": {
        "text": {"signal": "parent[\"isFraud\"]"},
        "style": "guide-label",
        "frame": "group",
        "offset": 10
      "encode": {"update": {"width": {"signal": "child_width"}}}
      "name": "cell",
      "type": "group",
      "style": "cell",
      "from": {
        "facet": {
          "name": "facet",
          "data": "feature_correlation",
          "groupby": ["isFraud"]
      "sort": {"field": ["datum[\"isFraud\"]"], "order": ["ascending"]},
      "encode": {
        "update": {
          "width": {"signal": "child_width"},
          "height": {"signal": "child_height"}
      "marks": [
          "type": "rect",
          "from": {"data": "facet"},
          "encode": {
            "enter": {
              "x": {"scale": "x", "field": "name"},
              "y": {"scale": "y", "field": "key"},
              "width": {"scale": "x", "band": 1},
              "height": {"scale": "y", "band": 1},
              "tooltip": {
                "signal": " + ' - ' + datum.key + ': ' + format(datum.value, '.3f')"
            "update": {"fill": {"scale": "color", "field": "value"}}
      "legends": [
          "fill": "color",
          "type": "gradient",
          "title": "Correlation",
          "titleFontSize": 12,
          "titlePadding": 4,
          "gradientLength": {"signal": "child_height - 16"}
      "axes": [
          "orient": "bottom",
          "scale": "x",
          "labelAngle": -45,
          "tickOffset": 0,
          "labelAlign": "right",
          "domain": false
        {"orient": "left", "scale": "y", "domain": false}
  "scales": [
      "name": "x",
      "type": "band",
      "domain": {"data": "feature_correlation", "field": "name"},
      "range": [0, {"signal": "child_width"}],
      "nice": true,
      "zero": true
      "name": "y",
      "type": "band",
      "domain": {"data": "feature_correlation", "field": "key"},
      "range": [0, {"signal": "child_height"}]
      "name": "color",
      "type": "linear",
      "range": {"scheme": "yelloworangebrown"},
      "domain": {"data": "feature_correlation", "field": "value"},
      "zero": false,
      "nice": true


That looks great, thanks, Vincent!

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.