Hi, I have a datastream that receives 1000s of requests a minute. I am testing with a script to make logs, it makes 1000 requests in a minute. I am very new to Elasticsearch. The datastream works fine. I set up a transform (1_minute) to group and aggregate these logs by 1 minute along with other groupings, this works perfectly. I then set up another transform with the source pointed to the 1_minute destination index. Here I group it into 1_hour instead of 1 minute, with the same mappings overall. For some reason, this transform falls behind very easily with operations_behind ranging in 2-10 operations. Any clues? Any more info I must give?
My transform JSONs:
{
"id": "1_min_access_logs",
"authorization": {
"roles": [
"superuser"
]
},
"version": "10.0.0",
"create_time": 1721693863674,
"source": {
"index": [
"logs-raw_access"
],
"query": {
"match_all": {}
}
},
"dest": {
"index": "1_min_access_logs"
},
"sync": {
"time": {
"field": "@timestamp",
"delay": "60s"
}
},
"pivot": {
"group_by": {
"@timestamp": {
"date_histogram": {
"field": "@timestamp",
"calendar_interval": "1m"
}
},
"user_account": {
"terms": {
"field": "user_account"
}
},
"request_method": {
"terms": {
"field": "request_method"
}
},
"request_path": {
"terms": {
"field": "request_path"
}
},
"response_code": {
"terms": {
"field": "response_code"
}
}
},
"aggregations": {
"response_time": {
"avg": {
"field": "response_time"
}
},
"num_docs": {
"value_count": {
"field": "request_id"
}
}
}
},
"description": "1_min_access_logs",
"settings": {}
}
{
"id": "1_hour_access_logs",
"authorization": {
"roles": [
"superuser"
]
},
"version": "10.0.0",
"create_time": 1721694111690,
"source": {
"index": [
"1_min_access_logs"
],
"query": {
"match_all": {}
}
},
"dest": {
"index": "1_hour_access_logs"
},
"sync": {
"time": {
"field": "@timestamp",
"delay": "120s"
}
},
"pivot": {
"group_by": {
"@timestamp": {
"date_histogram": {
"field": "@timestamp",
"calendar_interval": "1h"
}
},
"user_account": {
"terms": {
"field": "user_account"
}
},
"request_method": {
"terms": {
"field": "request_method"
}
},
"request_path": {
"terms": {
"field": "request_path"
}
},
"response_code": {
"terms": {
"field": "response_code"
}
}
},
"aggregations": {
"num_docs": {
"sum": {
"field": "num_docs"
}
},
"response_time_average": {
"avg": {
"field": "response_time_average"
}
}
}
},
"description": "1_hour_access_logs",
"settings": {}
}