Hi, I am new to elasticsearch. I have a datastream that uses a custom timestamp field that is made by envoy, and fed thru filebeat to logstash (with grok) then to elasticsearch. The use case is that there are thousands of requests coming into a log, and many times these documents/requests share the same timestamp down to the millisecond.
I am trying to make a 1-minute continuous aggregation/transform, that groups by 1 minute and makes a sum of the total records that were in that 1 minute time period. I believe the transform is skipping/not recognizing documents that share the same timestamp, because the aggregate is always barely off ranging from a dozen to hundreds of records that were not 'counted'. I store this as 'num_docs' on my transform index.
Here is my 1 minute transform json:
{
"id": "1_minute_access_logs",
"authorization": {
"roles": [
"superuser"
]
},
"version": "10.0.0",
"create_time": 1717458721029,
"source": {
"index": [
"filebeat*"
],
"query": {
"match_all": {}
}
},
"dest": {
"index": "1_minute_access_logs"
},
"frequency": "30s",
"sync": {
"time": {
"field": "timestamp",
"delay": "5s"
}
},
"pivot": {
"group_by": {
"user_account": {
"terms": {
"field": "user_account"
}
},
"timestamp": {
"date_histogram": {
"field": "timestamp",
"calendar_interval": "1m"
}
},
"request_method": {
"terms": {
"field": "request_method"
}
},
"response_code": {
"terms": {
"field": "response_code"
}
}
},
"aggregations": {
"num_docs": {
"value_count": {
"field": "@timestamp"
}
},
"timestamp_max": {
"max": {
"field": "timestamp"
}
},
"timestamp_min": {
"min": {
"field": "timestamp"
}
},
"response_time_avg": {
"avg": {
"field": "response_time"
}
}
}
},
"description": "1_minute_access_logs",
"settings": {},
"retention_policy": {
"time": {
"field": "timestamp",
"max_age": "70m"
}
}
}
This is my result from querying with postman on the 1 min:
"aggregations": {
"total_num_docs": {
"value": 96581.0
}
}
I expected 96,658
Please let me know if you need any more information - thank you!