Help Transforming Data and Combining Fields

Hi everyone,
I'm hoping someone can help me with transforming some data. Every 5 minutes I have a script capture wireless client information from my wireless controller and ship it into ElasticSearch. This information is used for troubleshooting purposes and allows us to review connection quality for Wireless APs or specific devices. I'm trying to transform the data into AP centric statistics. So far I've figured out how to use transforms to convert the data so I have a timestamp, AP name, and client count, but would like to break it down further to show how many clients were connected to the 2.4GHz and 5GHz radios. Adding the radio to the group_by properties of the pivot works, but results in 2 documents for each AP for a given time. Is there a way to combine the documents so I have a single document with the count for each radio and the overall client count? I feel that pipeline aggregations may be able to do that, but I've not been able to figure out how to craft the query. Or maybe I've overlooking a simpler solution.

Here is super simplified version of the type of data I'm working with.

POST test-wifi-sample-data/_bulk

{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "AA:AA:AA:AA:AA:AA","apName": "AP1","radioType":"2.4G","clientMac":"BB:BB:BB:BB:BB:BB"}
{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "AA:AA:AA:AA:AA:AA","apName": "AP1","radioType":"2.4G","clientMac":"CB:BB:BB:BB:BB:BB"}
{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "AA:AA:AA:AA:AA:AA","apName": "AP1","radioType":"2.4G","clientMac":"DB:BB:BB:BB:BB:BB"}
{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "AA:AA:AA:AA:AA:AA","apName": "AP1","radioType":"5G","clientMac":"EB:BB:BB:BB:BB:BB"}
{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "AA:AA:AA:AA:AA:AA","apName": "AP1","radioType":"5G","clientMac":"FB:BB:BB:BB:BB:BB"}
{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "AA:AA:AA:AA:AA:AA","apName": "AP1","radioType":"5G","clientMac":"BC:BB:BB:BB:BB:BB"}
{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "BA:AA:AA:AA:AA:AA","apName": "AP2","radioType":"2.4G","clientMac":"BB:BC:BB:BB:BB:BB"}
{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "BA:AA:AA:AA:AA:AA","apName": "AP2","radioType":"2.4G","clientMac":"B:BD:BB:BB:BB:BB"}
{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "BA:AA:AA:AA:AA:AA","apName": "AP2","radioType":"2.4G","clientMac":"BB:BE:BB:BB:BB:BB"}
{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "BA:AA:AA:AA:AA:AA","apName": "AP2","radioType":"5G","clientMac":"BB:BF:BB:BB:BB:BB"}
{"index":{}}
{"date":"2020-10-09T11:00:00-04:00","apMac": "BA:AA:AA:AA:AA:AA","apName": "AP2","radioType":"5G","clientMac":"BB:CA:BB:BB:BB:BB"}

Here is the first transform I created that aggregates the number of clients connected to each AP based on the radio.

{
  "id": "transform1-test-wifi-sample-data",
  "source": {
	"index": [
	  "test-wifi-sample-data*"
	],
	"query": {
	  "match_all": {}
	}
  },
  "dest": {
	"index": "transform1-test-wifi-sample-data"
  },
  "pivot": {
	"group_by": {
	  "date": {
		"date_histogram": {
		  "field": "date",
		  "calendar_interval": "1m"
		}
	  },
	  "apName": {
		"terms": {
		  "field": "apName.keyword"
		}
	  },
	  "radioType": {
		"terms": {
		  "field": "radioType.keyword"
		}
	  }
	},
	"aggregations": {
	  "clientcount": {
		"value_count": {
		  "field": "apMac.keyword"
		}
	  }
	}
  },
  "version": "7.5.1",
  "create_time": 1602265867048
}

Which gives me a results like this:

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
	"total" : 1,
	"successful" : 1,
	"skipped" : 0,
	"failed" : 0
  },
  "hits" : {
	"total" : {
	  "value" : 4,
	  "relation" : "eq"
	},
	"max_score" : 1.0,
	"hits" : [
	  {
		"_index" : "transform1-test-wifi-sample-data",
		"_type" : "_doc",
		"_id" : "QQAyJ6ndXEHixiYxuP4lkNhTAQAAAAAA",
		"_score" : 1.0,
		"_source" : {
		  "date" : 1602255600000,
		  "clientcount" : 3.0,
		  "radioType" : "2.4G",
		  "apName" : "AP1"
		}
	  },
	  {
		"_index" : "transform1-test-wifi-sample-data",
		"_type" : "_doc",
		"_id" : "QQA1jEeGztDt9oN9hipPdSiG4wAAAAAA",
		"_score" : 1.0,
		"_source" : {
		  "date" : 1602255600000,
		  "clientcount" : 3.0,
		  "radioType" : "5G",
		  "apName" : "AP1"
		}
	  },
	  {
		"_index" : "transform1-test-wifi-sample-data",
		"_type" : "_doc",
		"_id" : "QQAyxe_XTLGOncI8OCH2RIzJewAAAAAA",
		"_score" : 1.0,
		"_source" : {
		  "date" : 1602255600000,
		  "clientcount" : 3.0,
		  "radioType" : "2.4G",
		  "apName" : "AP2"
		}
	  },
	  {
		"_index" : "transform1-test-wifi-sample-data",
		"_type" : "_doc",
		"_id" : "QQA1EPcey94zCW9-sO2oMDMhRQAAAAAA",
		"_score" : 1.0,
		"_source" : {
		  "date" : 1602255600000,
		  "clientcount" : 2.0,
		  "radioType" : "5G",
		  "apName" : "AP2"
		}
	  }
	]
  }
}

I would like a results to look something like this

{
  "_index" : "transform1-test-wifi-sample-data",
  "_type" : "_doc",
  "_id" : "QQAyxe_XTLGOncI8OCH2RIzJewAAAAAA",
  "_score" : 1.0,
  "_source" : {
	"date" : 1602255600000,
	"clientcount" : 6.0,
	"2.4radioclientcount" : "3.0",
	"5radioclientcount": "3.0",
	"apName" : "AP1"
  }
},
{
  "_index" : "transform1-test-wifi-sample-data",
  "_type" : "_doc",
  "_id" : "QQAyxe_XTLGOncI8OCH2RIzJewAAAAAA",
  "_score" : 1.0,
  "_source" : {
	"date" : 1602255600000,
	"clientcount" : 5.0,
	"2.4radioclientcount" : "3.0",
	"5radioclientcount": "2.0",
	"apName" : "AP2"
  }
}

Thank you for all the help you can provide.

You can use 2 filter aggregations like in this example for http response code:

"2_4radioclientcount" : { 
  "filter": {
    "term": { "radioType" : "2.4G"}
  }
}

(Avoid . in the names, because it will create nested objects or you use it intentionally like count.total, count.5G, count.2_4G which will result in

"count": {
  "total": 6.0,
  "2_4G": 3.0,
  "5G": 3.0
}

)

FWIW: A terms aggregation instead of filter is another option, but I think filter is the better one for you.

Hi Hendrik,

Thank you for the suggestion. I had read that example at one point several days ago when I was getting started on this project and forgot about it. I tried your suggestion but kept running into the following error message.

{
  "statusCode": 400,
  "error": "Bad Request",
  "message": "[status_exception] Unsupported aggregation type [filter]",
  "cause": [
    "Unsupported aggregation type [filter]"
  ]
}

Turns out the version of Elasticsearch I'm running (7.5.1) doesn't support aggregation filters yet. The documentation is very different in 7.5 compared to 7.6 and newer (https://www.elastic.co/guide/en/elasticsearch/reference/7.5/transform-examples.html#example-clientips). It's time for me to update my stack anyway and start preparing for 8.x. That will better in the long term then writing the aggregation for 7.5 and then re-writing it for 7.9.

I'll try to get the updates done and report back if there are still issues.

Yes, we added filter in 7.7, sorry I did not see you are on 7.5. The only way for 7.5 would be a scripted_metric, but upgrade is the better option.

However 7.9 won't be the last minor in the 7.x series, there will be more minors before 8.x. That's all I can say. For transform the upgrade will bring some useful additions, e.g. if you plan to use continuous mode.

Thanks Hendrik,

I jumped over to my test server, updated it, and tried your suggestion. It worked like a charm. I really appreciate your help with this. Here is my final transform:

POST _transform/_preview
{
  "source": {
    "index": "test-wifi-sample-data"
  }, 
  "pivot": {
    "group_by": {
      "date": {
        "date_histogram": {
          "field": "date",
          "calendar_interval": "1m"
        }
      },
      "apName": {
        "terms": {
          "field": "apName.keyword"
        }
      }
    },
    "aggregations": {
      "2_4G_clientcount": {
        "filter": {
          "term": {
            "radioType.keyword": "2.4G"
          }
        }
      },
      "5G_clientcount": {
        "filter": {
          "term": {
            "radioType.keyword": "5G"
          }
        }
      },
      "total_clientcount": {
        "value_count": {
          "field": "apMac.keyword"
        }
      }
    }
  }
}

And here are the preview results.

{
  "preview" : [
    {
      "date" : 1602255600000,
      "2_4G_clientcount" : 3,
      "total_clientcount" : 6.0,
      "5G_clientcount" : 3,
      "apName" : "AP1"
    },
    {
      "date" : 1602255600000,
      "2_4G_clientcount" : 3,
      "total_clientcount" : 5.0,
      "5G_clientcount" : 2,
      "apName" : "AP2"
    }
  ],
  "generated_dest_index" : {
    "mappings" : {
      "_meta" : {
        "_transform" : {
          "transform" : "transform-preview",
          "version" : {
            "created" : "7.9.2"
          },
          "creation_date_in_millis" : 1602517126215
        },
        "created_by" : "transform"
      },
      "properties" : {
        "2_4G_clientcount" : {
          "type" : "long"
        },
        "date" : {
          "type" : "date"
        },
        "apName" : {
          "type" : "keyword"
        },
        "total_clientcount" : {
          "type" : "long"
        },
        "5G_clientcount" : {
          "type" : "long"
        },
        "apName" : {
          "type" : "object"
        }
      }
    },
    "settings" : {
      "index" : {
        "number_of_shards" : "1",
        "auto_expand_replicas" : "0-1"
      }
    },
    "aliases" : { }
  }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.