OpenSearch v2.11.1
Vega v5
I have already posted this question in OpenSearch community. I think I might get additional help in this group.
{
"$schema": "...modified/schema/vega/v5.json",
"description": "An arc diagram depicting character co-occurrence in the novel Les Misérables."
"data": [
{
"name":"edges",
"url": {
"%context%": true,
"index": "name_index",
"body":{
"size": 0,
"aggs": {
"ip_pairs": {
"composite": {
"size": 50,
"sources": [
{ "source": { "terms": { "field": "source.ip" } } },
{ "target": { "terms": { "field": "destination.ip" } } }
]
},
"aggs": {
"total_bytes": {
"sum": {
"script": {
"source": "if (doc['source.bytes'].size() > 0 && doc['source.bytes'].value > 0 && doc['destination.bytes'].size() > 0 && doc['destination.bytes'].value > 0) {return doc['source.bytes'].value + doc['destination.bytes'].value;} else {return 0;}",
"lang": "painless"
}
}
}
}
}
}
}
},
"format": {
"property": "aggregations.ip_pairs.buckets"
},
"transform": [
{
"type": "formula",
"as": "source",
"expr": "datum.key.source"
},
{
"type": "formula",
"as": "target",
"expr": "datum.key.target"
},
{
"type": "formula",
"as": "total_bytes",
"expr": "datum.total_bytes.value"
},
// {
// "type": "project",
// "fields": ["source", "target", "total_bytes"], // Only include field1 and field2 in the output
// "as":["source", "target", "total_bytes"] // Optionally rename the fields (omitting "as" keeps the original names)
// }
]
},
{
"name": "filteredEdges",
"source": "edges",
"transform": [
{
"type": "filter",
"expr": "!selectedIP || datum.source === selectedIP || datum.target === selectedIP"
}
]
},
{
"name": "sourceDegree",
"source": "edges",
"transform": [
{
"type": "aggregate",
"groupby": ["source"],
"ops": ["count"], // Add an aggregation operation
"as": ["degree"] // Name the output field
}
]
},
{
"name": "targetDegree",
"source": "edges",
"transform": [
{
"type": "aggregate",
"groupby": ["target"],
"ops": ["count"], // Add an aggregation operation
"as": ["degree"] // Name the output field
}
]
},
{
"name": "unique_ips",
"source": "edges",
"transform": [
{
"type": "aggregate",
"groupby": ["source", "target"]
},
{
"type": "fold",
"fields": ["source", "target"]
},
{
"type": "aggregate",
"groupby": ["value"],
"as": ["unique_ips"]
},
{
"type": "formula",
"as": "name",
"expr": "datum.value"
},
{
"type": "window",
"ops": ["row_number"],
"as": ["index"]
},
{
"type": "project",
"fields": ["name", "index"]
}
]
},
{
"name": "nodes",
"source": "unique_ips",
"transform": [
{
"type": "lookup",
"from": "sourceDegree",
"key": "source",
"fields": ["name"],
"as": ["sourceDegree"],
"default":{"degree":0}
},
{
"type": "lookup",
"from": "targetDegree",
"key": "target",
"fields": ["name"],
"as": ["targetDegree"],
"default":{"degree":0}
},
{
"type": "formula",
"as": "degree",
"expr": "datum.sourceDegree.degree + datum.targetDegree.degree"
},
{
// Generate an order number for each IP
"type": "window",
"ops": ["row_number"],
"as": ["order"]
},
]
},
{
"name": "connectedNodes",
"source": "nodes",
"transform": [
{
"type": "filter",
"expr": "selectedIP ? (indata('filteredEdges', 'source', datum.name) || indata('filteredEdges', 'target', datum.name) || datum.name === selectedIP) : true"
}
]
}
],
"signals": [
{
"name": "arcHeight",
"value": 0,
"bind": {"input": "range", "min": 0, "max": 50, "step": 5}
}
],
"signals": [
{
"name": "selectedIP", // This signal holds the currently selected IP
"value": null, // Default value is null (no selection)
"on": [ // This signal updates on these events
{
"events": "@ipText:click", // When text with the "name" mark "ipText" is clicked
"update": "datum.name === selectedIP ? null : datum.name" // Toggle selection
}
]
}
]
"scales": [
{
"name": "position",
"type": "band",
"domain": {"data": "nodes", "field": "order", "sort": true},
"range": "width"
},
{
"name": "color",
"type": "ordinal",
"range": "category",
"domain": {"data": "nodes", "field": "order"}
},
{
"name": "widthScale",
"type": "linear",
"domain": {"data": "edges", "field": "total_bytes"},
"range": [1, 10]
}
],
"marks": [
{
"type": "symbol",
"name": "layout",
"interactive": false,
"from": {"data": "nodes"},
"encode": {
"enter": {
"opacity": {"value": 0}
},
"update": {
"x": {"scale": "position", "field": "order"},
"y": {"value": 0},
"size": {"field": "degree", "mult": 19, "offset": 20},
"fill": [
{"test": "datum.degree < 2", "value": "green"},
{"test": "datum.degree >= 2 && datum.degree < 5", "value": "yellow"},
{"test": "datum.degree >= 5", "value": "red"}
]
}
}
},
{
"type": "path",
"from": {"data": "filteredEdges"},
"encode": {
"update": {
"stroke": {"value": "#000"},
"strokeOpacity": {"value": 0.2},
"strokeWidth": {"scale": "widthScale", "field": "total_bytes"},
//"y":{"signal":"arcHeight"}
}
},
"transform": [
{
"type": "lookup", "from": "layout", "key": "datum.name",
"fields": ["datum.source", "datum.target"],
"as": ["sourceNode", "targetNode"]
},
{
"type": "linkpath",
"sourceX": {"expr": "min(datum.sourceNode.x, datum.targetNode.x)"},
"targetX": {"expr": "max(datum.sourceNode.x, datum.targetNode.x)"},
"sourceY": {"expr":"0"},
"targetY": {"expr": "0"},
"shape": "arc" }
]
},
{
"type": "symbol",
"from": {"data": "layout"},
"encode": {
"update": {
"x": {"field": "x"},
"y": {"field": "y"},
"fill": {"field": "fill"},
"size": {"field": "size"}
}
}
},
{
"type": "text",
"name":"ipText",
"from": {"data": "connectedNodes"},
"encode": {
"update": {
"x": {"scale":"position", "field":"order"},
"y": {"value": 7},
"fontSize": {"value": 12},
"align": {"value": "right"},
"baseline": {"value": "middle"},
"angle": {"value": -90},
"text": {"field": "name"},
"fill": [
{"test": "datum.name === selectedIP", "value": "firebrick"}, // Highlight selected IP
{"value": "black"} // Default color for other IPs
]
}
}
}
]
}
This script creates arc shaped visualization with ips being connected through paths.
Objective:
Whenever any ip address is being clicked, a new filter should be created at the top left.
So all connections of that particular ip would be shown. The same effect can be achieved when I use filter manually
source.ip is ip or source.ip is one of ip
Options:
- Interactive visualization which creates or modifies a filter when ip address is being clicked. Filter is created at the top left of the visualization. (Visualization filter)
- Vega will retrieve data from OpenSearch every time ip address is clicked. It means that it will request new data every time when ip is clicked.
In the first option we have initial data set which does not change we just take some ips selectively based on the filter. In the second option, our data set changes with every click.
Transform and signal features does not work for me since they take limited ips from the initially retrieved data not updating data set.
Please let me know if any of the above mentioned options are possible to implement.