Hey @sieky welcome!
Thanks for asking this question.
I think the only missing part in your understanding is just where the data comes from and how it is passed to vega. But if you are looking for a good kibana-related vega tutorial, here is a good resource.
So in the simple example you posted with explicitly defined data[0].values
. The final shape of the table
dataset that vega sees looks like this...
category |
amount |
A |
28 |
B |
55 |
... |
... |
Side note you can view each dataset from the Vega debug view from the Inspect panel.
Now when you pull dynamic data from Elasticsearch, simply defining the data[0].url.index
is not sufficient, you must provide an aggregation
to query the index, this link explains this in more detail.
A good approach to obtain this query is to just build out the basic shape of the visualization in Lens then just copy the query from the Inspect -> Requests -> Request tab, as shown below.
In our case the request would look something like this...
{
"aggs": {
"0": {
"terms": {
"field": "category.keyword",
"order": {
"1": "desc"
},
"size": 5
},
"aggs": {
"1": {
"sum": {
"field": "amount"
}
}
}
}
},
"size": 0,
"fields": [],
"script_fields": {},
"stored_fields": [
"*"
],
"runtime_mappings": {},
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [],
"should": [],
"must_not": []
}
}
}
You could simplify this and change the aggs
key names if you'd like but I'll just keep it as is for the sake of this demo.
And the raw ES response of this would look like this...
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"0": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"1": {
"value": 55
},
"key": "B",
"doc_count": 1
},
{
"1": {
"value": 28
},
"key": "A",
"doc_count": 1
}
]
}
}
}
At this point our vega config looks like this...
Click to see full config
{
"$schema":"https://vega.github.io/schema/vega/v5.json",
"padding":5,
"data":[
{
"name":"dt_source",
"url":{
"index":"bar_example",
"body":{
"aggs":{
"0":{
"terms":{
"field":"category.keyword",
"order":{
"1":"desc"
},
"size":5
},
"aggs":{
"1":{
"sum":{
"field":"amount"
}
}
}
}
},
"size":0,
"fields":[
],
"script_fields":{
},
"stored_fields":[
"*"
],
"runtime_mappings":{
},
"_source":{
"excludes":[
]
},
"query":{
"bool":{
"must":[
],
"filter":[
],
"should":[
],
"must_not":[
]
}
}
}
}
}
],
"signals":[
{
"name":"tooltip",
"value":{
},
"on":[
{
"events":"rect:mouseover",
"update":"datum"
},
{
"events":"rect:mouseout",
"update":"{}"
}
]
}
],
"scales":[
{
"name":"xscale",
"type":"band",
"domain":{
"data":"dt_source",
"field":"category"
},
"range":"width",
"padding":0.05,
"round":true
},
{
"name":"yscale",
"domain":{
"data":"dt_source",
"field":"amount"
},
"nice":true,
"range":"height"
}
],
"axes":[
{
"orient":"bottom",
"scale":"xscale"
},
{
"orient":"left",
"scale":"yscale"
}
],
"marks":[
{
"type":"rect",
"from":{
"data":"dt_source"
},
"encode":{
"enter":{
"x":{
"scale":"xscale",
"field":"category"
},
"width":{
"scale":"xscale",
"band":1
},
"y":{
"scale":"yscale",
"field":"amount"
},
"y2":{
"scale":"yscale",
"value":0
}
},
"update":{
"fill":{
"value":"steelblue"
}
},
"hover":{
"fill":{
"value":"red"
}
}
}
},
{
"type":"text",
"encode":{
"enter":{
"align":{
"value":"center"
},
"baseline":{
"value":"bottom"
},
"fill":{
"value":"#333"
}
},
"update":{
"x":{
"scale":"xscale",
"signal":"tooltip.category",
"band":0.5
},
"y":{
"scale":"yscale",
"signal":"tooltip.amount",
"offset":-2
},
"text":{
"signal":"tooltip.amount"
},
"fillOpacity":[
{
"test":"isNaN(tooltip.amount)",
"value":0
},
{
"value":1
}
]
}
}
}
]
}
But this still does not work because if we look again at the dt_source
dataset in the Vega debug tab, we see that the dataset values are only using the top values from the Response.
To fix this we need to use the data[0].format
property. This is basically like lodash.get
that replaces the dataset with whatever is found at the defined path. In our case we want this to point to the buckets that contain our data values, this path is defined based on our defined aggs
as aggregations.0.buckets
.
{
"format": {
"property": "aggregations.0.buckets"
}
}
With this change the dt_source
now shows the bucket values...
You could leave it here and just access the values as is (i.e. 1.value
instead of amount
and key
instead of category
). But a cleaner way would be to just define a transform
on the data
object that creates a new column based on the result of an expression. In our case we want to extract key
as category
and [1].value
as amount
.
{
"transform":[
{
"type":"formula",
"expr":"datum.key",
"as":"category"
},
{
"type":"formula",
"expr":"datum[1].value",
"as":"amount"
}
]
}
Note: the datum
value is a reserved variable to access the columnar data row. Also, since we defined the aggs name as 1
it is inferred as a number/index and must be accessed via bracket notation (i.e. [1]
) in the expr
[ession].
With these changes the dt_source
dataset is now in the expected form (see below) and can be used with your original vega config.
Click to see final config
{
"$schema":"https://vega.github.io/schema/vega/v5.json",
"padding":5,
"data":[
{
"name":"dt_source",
"url":{
"index":"bar_example",
"body":{
"aggs":{
"0":{
"terms":{
"field":"category.keyword",
"order":{
"1":"desc"
},
"size":5
},
"aggs":{
"1":{
"sum":{
"field":"amount"
}
}
}
}
},
"size":0,
"fields":[
],
"script_fields":{
},
"stored_fields":[
"*"
],
"runtime_mappings":{
},
"_source":{
"excludes":[
]
},
"query":{
"bool":{
"must":[
],
"filter":[
],
"should":[
],
"must_not":[
]
}
}
}
},
"format":{
"property":"aggregations.0.buckets"
},
"transform":[
{
"type":"formula",
"expr":"datum.key",
"as":"category"
},
{
"type":"formula",
"expr":"datum[1].value",
"as":"amount"
}
]
}
],
"signals":[
{
"name":"tooltip",
"value":{
},
"on":[
{
"events":"rect:mouseover",
"update":"datum"
},
{
"events":"rect:mouseout",
"update":"{}"
}
]
}
],
"scales":[
{
"name":"xscale",
"type":"band",
"domain":{
"data":"dt_source",
"field":"category"
},
"range":"width",
"padding":0.05,
"round":true
},
{
"name":"yscale",
"domain":{
"data":"dt_source",
"field":"amount"
},
"nice":true,
"range":"height"
}
],
"axes":[
{
"orient":"bottom",
"scale":"xscale"
},
{
"orient":"left",
"scale":"yscale"
}
],
"marks":[
{
"type":"rect",
"from":{
"data":"dt_source"
},
"encode":{
"enter":{
"x":{
"scale":"xscale",
"field":"category"
},
"width":{
"scale":"xscale",
"band":1
},
"y":{
"scale":"yscale",
"field":"amount"
},
"y2":{
"scale":"yscale",
"value":0
}
},
"update":{
"fill":{
"value":"steelblue"
}
},
"hover":{
"fill":{
"value":"red"
}
}
}
},
{
"type":"text",
"encode":{
"enter":{
"align":{
"value":"center"
},
"baseline":{
"value":"bottom"
},
"fill":{
"value":"#333"
}
},
"update":{
"x":{
"scale":"xscale",
"signal":"tooltip.category",
"band":0.5
},
"y":{
"scale":"yscale",
"signal":"tooltip.amount",
"offset":-2
},
"text":{
"signal":"tooltip.amount"
},
"fillOpacity":[
{
"test":"isNaN(tooltip.amount)",
"value":0
},
{
"value":1
}
]
}
}
}
]
}