In such case, I recommend the option 2 because it may significantly exceed the default max_buckets limitation. You can increase the settings but I'm not sure it works with millions of buckets. And also you have to increase the size parameter of terms aggregation to cover all users.
To visualize the historical change of the compliance user ratio, you need 2 stage aggregation (use average aggregation inspite of bucket_selector aggregation), which is not supported in most part of Kibana as you said.
The following is my example how to calculate the average/count of users with over 75% compliance ratio (just for experiment).
PUT test_aggregation
POST test_aggregation/_doc
{
"day": "2022-05-01",
"user": "Jane",
"skill role": "SE",
"skill": "Java",
"assessment_date": "2021-04-31",
"current_rating": 3,
"required_rating": 3,
"is_meeting_expectation": true
}
POST test_aggregation/_doc
{
"day": "2022-05-01",
"user": "Jane",
"skill role": "SE",
"skill": "python",
"assessment_date": "2021-04-31",
"current_rating": 3,
"required_rating": 3,
"is_meeting_expectation": true
}
POST test_aggregation/_doc
{
"day": "2022-05-01",
"user": "Jane",
"skill role": "SE",
"skill": "sql",
"assessment_date": "2021-04-31",
"current_rating": 3,
"required_rating": 4,
"is_meeting_expectation": false
}
POST test_aggregation/_doc
{
"day": "2022-05-02",
"user": "Jane",
"skill role": "SE",
"skill": "Java",
"assessment_date": "2021-04-31",
"current_rating": 3,
"required_rating": 3,
"is_meeting_expectation": true
}
POST test_aggregation/_doc
{
"day": "2022-05-02",
"user": "Jane",
"skill role": "SE",
"skill": "python",
"assessment_date": "2021-04-31",
"current_rating": 3,
"required_rating": 3,
"is_meeting_expectation": true
}
POST test_aggregation/_doc
{
"day": "2022-05-02",
"user": "Jane",
"skill role": "SE",
"skill": "sql",
"assessment_date": "2022-05-02",
"current_rating": 4,
"required_rating": 4,
"is_meeting_expectation": true
}
POST test_aggregation/_doc
{
"day": "2022-05-01",
"user": "John",
"skill role": "SE",
"skill": "Java",
"assessment_date": "2021-04-31",
"current_rating": 3,
"required_rating": 3,
"is_meeting_expectation": true
}
POST test_aggregation/_doc
{
"day": "2022-05-01",
"user": "John",
"skill role": "SE",
"skill": "python",
"assessment_date": "2021-04-31",
"current_rating": 2,
"required_rating": 3,
"is_meeting_expectation": false
}
POST test_aggregation/_doc
{
"day": "2022-05-01",
"user": "John",
"skill role": "SE",
"skill": "sql",
"assessment_date": "2021-04-31",
"current_rating": 3,
"required_rating": 4,
"is_meeting_expectation": false
}
POST test_aggregation/_doc
{
"day": "2022-05-02",
"user": "John",
"skill role": "SE",
"skill": "Java",
"assessment_date": "2021-04-31",
"current_rating": 3,
"required_rating": 3,
"is_meeting_expectation": true
}
POST test_aggregation/_doc
{
"day": "2022-05-02",
"user": "John",
"skill role": "SE",
"skill": "python",
"assessment_date": "2021-04-31",
"current_rating": 2,
"required_rating": 3,
"is_meeting_expectation": false
}
POST test_aggregation/_doc
{
"day": "2022-05-02",
"user": "John",
"skill role": "SE",
"skill": "sql",
"assessment_date": "2022-05-02",
"current_rating": 2,
"required_rating": 4,
"is_meeting_expectation": false
}
GET test_aggregation/_search
{
"size":0,
"aggs":{
"date": {
"date_histogram": {
"field": "day",
"interval": "day"
},
"aggs": {
"user":{
"terms":{
"field": "user.keyword",
"size": 10
},
"aggs": {
"compliance_ratio": {
"avg": {
"field": "is_meeting_expectation"
}
},
"75%":{
"bucket_script": {
"buckets_path": {"var": "compliance_ratio"},
"script": "params.var > 0.75?1:0;"
}
}
}
},
"average_compiance_ratio":{
"avg_bucket": {
"buckets_path": "user>compliance_ratio"
}
},
"75%_ratio":{
"avg_bucket": {
"buckets_path": "user>75%"
}
},
"75%_count":{
"sum_bucket": {
"buckets_path": "user>75%"
}
}
}
}
}
}
# what whill happen if it exceeds the "size" limit of terms agg.
GET test_aggregation/_search
{
"size":0,
"aggs":{
"date": {
"date_histogram": {
"field": "day",
"interval": "day"
},
"aggs": {
"user":{
"terms":{
"field": "user.keyword",
"size": 1
},
"aggs": {
"compliance_ratio": {
"avg": {
"field": "is_meeting_expectation"
}
},
"75%":{
"bucket_script": {
"buckets_path": {"var": "compliance_ratio"},
"script": "params.var > 0.75?1:0;"
}
}
}
},
"average_compiance_ratio":{
"avg_bucket": {
"buckets_path": "user>compliance_ratio"
}
},
"75%_ratio":{
"avg_bucket": {
"buckets_path": "user>75%"
}
},
"75%_count":{
"sum_bucket": {
"buckets_path": "user>75%"
}
}
}
}
}
}
With transform (option 2),
PUT test_aggregation_transform
{
"mappings": {
"properties": {
"user":{"type":"keyword"},
"date":{"type":"date"},
"compliance_ratio":{"type":"float"}
}
}
}
PUT _transform/test_aggregation_transform
{
"source": {
"index": "test_aggregation"
},
"dest":{
"index": "test_aggregation_transform"
},
"pivot": {
"aggregations": {
"compliance_ratio": {
"avg": {
"field": "is_meeting_expectation"
}
}
},
"group_by": {
"date": {
"date_histogram": {
"field": "day",
"calendar_interval": "day"
}
},
"user":{
"terms": {
"field": "user.keyword"
}
}
}
}
}
POST _transform/test_aggregation_transform/_start
# check if the first transform completed
GET _transform/test_aggregation_transform/_stats
GET /test_aggregation_transform/_search
You can visualize the historical change of compliance ratio over users, for example using formula in Lens on the destination index.
Keep in mind using customize time interval on horizontal axis.
Or you can create 75% compliance flag within transform.
POST _transform/_preview
{
"source": {
"index": "test_aggregation"
},
"dest":{
"index": "test_aggregation_transform"
},
"pivot": {
"aggregations": {
"compliance_ratio": {
"avg": {
"field": "is_meeting_expectation"
}
},
"75%":{
"bucket_script": {
"buckets_path": {"var": "compliance_ratio"},
"script": "params.var > 0.75?1:0;"
}
}
},
"group_by": {
"date": {
"date_histogram": {
"field": "day",
"calendar_interval": "day"
}
},
"user":{
"terms": {
"field": "user.keyword"
}
}
}
}
}