Help in visualizing custom query/aggregation

Hi there,

I am having trouble in visualizing the following elasticsearch query:

    {
      "query": {
        "exists": {
          "field": "log_transaction_status"
        }
      },
      "size": 0,
      "aggs": {
        "group_by_id": {
          "terms": {
            "field": "log_transaction_id"
          },
          "aggs": {
            "only_status": {
              "filter": {
                "term": {
                  "log_transaction_status.keyword": "active"
                }
              },
              "aggs": {
                "latest_date": {
                  "max": {
                    "field": "log_timestamp"
                  }
                },
                "top_hit": {
                  "top_hits": {
                    "size": 1,
                    "_source": {
                        "includes": "log_transaction_id"
                     },
                    "sort": [
                      {
                        "log_timestamp": {
                          "order": "desc"
                        }
                      }
                    ]
                  }
                }
              }
            },
            "max_date": {
              "max": {
                "field": "log_timestamp"
              }
            },
            "has_defined_status": {
              "bucket_selector": {
                "buckets_path": {
                  "failed_date": "only_status.latest_date",
                  "max_date": "max_date"
                },
                "script": "params.failed_date == params.max_date"
              }
            }
          }
        },
        "bucket_count":{
          "stats_bucket": {
            "buckets_path":"group_by_id._count"
          }
        }
      }
    }

Basically, with this query, I group all records by a specific id (log_transaction_id), find the latest record and check if the latest record is equal to the record which holds the status "active" (by comparing the timestamps).
The reason for this is that I want to count all groups of records where the latest entry has a specific status.

The query above gives me the correct result, at least the buckets count in the aggregation is correct. Furthermore to asking for visualization, I also want to know if I can only count the buckets, not the doc count. And I want to visualize that as a metric, meaning showing just the count.

Thanks a lot for helping!

The only way to visualize a Query DSL query like the one you have in Kibana is to use Vega-Lite or Vega. Otherwise, depending on the specific features you're using you may be able to reproduce the same query using one of the build-in tools.

You can understand how to query Elasticsearch using Vega-Lite with the tutorial.

Thanks @wylie , I will try that.

Hey @wylie. I finally get the correct results from the Vega query. I debugged it through the Vega Debug console and this is the source_0 set:

Now I want to visualize this as a count of the buckets, in this case 2, and just as a single text field, to basically say, I have 2 records.

This is my current Vega schema:

    {
      "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
      "title": "Current amount of interrupted transactions",
      "data": {
        "url": {
          "index": "udboffice_logs-dev-transaction-2021-01",
          "body": {
            "query": {
              "exists": {
                "field": "log_transaction_status"
              }
            },
            "aggs": {
              "group_by_id": {
                "terms": {
                  "field":"log_transaction_id"
                },
                "aggs": {
                  "only_status": {
                    "filter": {
                      "term": {
                        "log_transaction_status.keyword": "active"
                      }
                    }
                    "aggs": {
                      "latest_date": {
                        "max": {
                          "field": "log_timestamp"
                        }
                      },
                      "top_hit": {
                        "top_hits": {
                          "size": 1,
                          "sort": [
                            {
                              "log_timestamp": {
                                "order": "desc"
                              }
                            }
                          ]
                        }
                      }
                    }
                  },
                  "max_date": {
                    "max": {
                      "field": "log_timestamp"
                    }
                  },
                  "has_defined_status": {
                    "bucket_selector": {
                      "buckets_path": {
                        "failed_date": "only_status.latest_date",
                        "max_date": "max_date"
                      },
                      "script": "params.failed_date == params.max_date"
                    }
                  }
                }
              },
            },
            "size": 0
          }
        },
        "format": {
          "property": "aggregations.group_by_id.buckets"
        },
        transform: [{
          flatten: ["aggregations.group_by_id.buckets"],
          as: ["buckets"]
         }]
      },

      "mark": "area",


      encoding: {
        x: {
          field: key
          type: nominal
          axis: { title: null }
        }
        y: {
          field: doc_count
          type: quantitative
          axis: { title: "Document count" }
        }
      }
    }

Could you help me write the transform and encoding part?
Thanks a lot.

So by adding this part, I get a horiztonal single bar with the amount showing up at the bottom:

 mark: {
    "type": "bar",
    "color": "red"
  },

  encoding: {
    "x": {
      "aggregate": "count",
      "field": "_count",
      "title": "Interrupted transactions"
    }
  }

As I asked, it is possible to just show the number "2" in a single field in the middle of the visualization?
And if not, how can I get a vertical single bar chart, with the count at the left side, and it full amount (currently, I have 0.1 steps in between).

Thanks a lot!

Here is what I would suggest:

  1. Use mark: text to get a large text
  2. Use the encoding channel y set to something like height / 2
  3. Use the encoding channel text to set the value
  4. Use the aggregate transform to get the count of rows.

@wylie That's it, thank you a lot.
I added a stats_bucket and retrieve the count from there. This is the complete schema:

{
  "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
  "title": "Current amount of interrupted transactions",
  "data": {
    "url": {
      "index": "udboffice_logs-dev-transaction-2021-01",
      "body": {
        "query": {
          "exists": {
            "field": "log_transaction_status"
          }
        },
        "aggs": {
          "group_by_id": {
            "terms": {
              "field":"log_transaction_id"
            },
            "aggs": {
              "only_status": {
                "filter": {
                  "term": {
                    "log_transaction_status.keyword": "active"
                  }
                }
                "aggs": {
                  "latest_date": {
                    "max": {
                      "field": "log_timestamp"
                    }
                  },
                  "top_hit": {
                    "top_hits": {
                      "size": 1,
                      "sort": [
                        {
                          "log_timestamp": {
                            "order": "desc"
                          }
                        }
                      ]
                    }
                  }
                }
              },
              "max_date": {
                "max": {
                  "field": "log_timestamp"
                }
              },
              "has_defined_status": {
                "bucket_selector": {
                  "buckets_path": {
                    "failed_date": "only_status.latest_date",
                    "max_date": "max_date"
                  },
                  "script": "params.failed_date == params.max_date"
                }
              }
            }
          },
          "stats" : {
            "stats_bucket": {
              "buckets_path": "group_by_id._count"
            }
          }
        },
        "size": 0
      }
    }
  },
  
  "mark": {
    "type": "text",
    "fontSize": 100,
    "color": "red"
  },

  encoding: {
    "x": {
      "field": "Origin",
      "type": "nominal"
    },
    "text": {
      "field": "aggregations.stats.count",
      "type": "nominal"
    }
  },
  "config": {
    "axis": {
      "grid": false,
      "tickBand": "extent"
    }
  }
}

And this is how it looks:

Sure, that approach makes sense, but I don't typically use it because not everything can be expressed using the Elasticsearch aggregations framework. For complex logic it's easier to express the transformations in Vega, but for simple logic either one is fine.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.