Hi,
I'm doing this query
GET _search
{
"size": 0,
"query": {
"range": {
"@timestamp": {
"gte": "now-365d/d",
"to": "now-10m/m"
}
}
},
"aggs": {
"timestamp": {
"terms": {
"field": "@timestamp",
"size": 10,
"order": {
"_term": "asc"
}
},
"aggs": {
"k": {
"terms": {
"field": "k",
"size": 10000
},
"aggs": {
"v": {
"terms": {
"field": "v",
"size": 10000,
"min_doc_count": 2
}
}
}
}
}
}
}
}
And I get this response
{
"took": 17,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 83047,
"max_score": 0,
"hits": []
},
"aggregations": {
"timestamp": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 82939,
"buckets": [
{
"key": 1488163800000,
"key_as_string": "1488163800000",
"doc_count": 4,
"k": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "request_type",
"doc_count": 3,
"v": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "stream_protocol",
"doc_count": 1,
"v": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
]
}
},
{
"key": 1488163860000,
"key_as_string": "1488163860000",
"doc_count": 8,
"k": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "request_type",
"doc_count": 5,
"v": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "vod",
"doc_count": 3
}
]
}
},
...
What I would like is not have timestamp = 1488163800000 because the sub sub aggregations are empty
I'm trying to find duplicates without using scripts, would it be possible to filter out timestamp if there is no duplicates in timestamp>k>v ?
Else what I'm looking for is the 10 oldest {timestamp, k, v} that are duplicates
Thanks
Etienne