How to return the most recent distinct doc by the query


(Tiannan Wang) #1

I have a list of build info records under jenkins/build which contains jobname may be duplicated by different build time. I need to return the most recent distinct doc by the query.

here is my query right now:
GET jenkins/build/_search
{
"query": {
"bool": {
"should": [
{ "match": { "pluginName": "cobertura" }},
{ "match": {"pluginName": "clover" }},
{ "match": { "pluginName": "jenkins-elasticsearch-zaphyr" }}
]
}
},
"sort":{ "BUILD_ID": "desc" },

"size": 1,
"aggs" : {
"distinct_rec" : {
"terms" : { "field" : "jobName" }
}
}
}

it returns :
{
"took": 592,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 643,
"max_score": null,
"hits": [
{
"_index": "jenkins",
"_type": "build",
"_id": "AU-Fk10Pt-Hfa4CI2JWt",
"_score": null,
"source": {
"number": 2527,
"jobName": "FLDR_DM/TRUNK/DM/DM_CI",
"result": "SUCCESS",
"startTime": 1441053492048,
"duration": 1572975,
"environment": {
"
": "21693/apps/jdk/jdk1.7.0/bin/java",
"_AST_FEATURES": "UNIVERSE - ucb",
"A__z": ""*SHLVL",
"APP_UTIL_DATADIR": "/apps/SFCMData",
"APP_UTIL_DIR": "/apps/util",
"applicationName": "DATAMGT",
"AWS_ACCESS_KEY_ID": "AKIAJDLTPWWVTBCPFILQ",
"AWS_SECRET_KEY": "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",
"BUILD_CAUSE": "SCMTRIGGER",
"BUILD_CAUSE_SCMTRIGGER": "true",
"BUILD_DISPLAY_NAME": "#2527",
"BUILD_ID": "20150831163812",
.....
"sort": [
null
]
}
]
},
"aggregations": {
"distinct_rec": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1,
"buckets": [
{
"key": "DIVER/DIVER_Services/DIVER_Services-ci",
"doc_count": 360
},
{
"key": "FLDR_DM/TRUNK/DM/DM_CI",
"doc_count": 180
},
{
"key": "FLDR_OLA/FOLA_APP/ola_app_ci",
"doc_count": 54
},
{
"key": "SQD_1",
"doc_count": 22
},
{
"key": "SQD_2",
"doc_count": 7
},
{
"key": "FLDR_DM/TRUNK/DM/Herd_CI",
"doc_count": 6
},
{
"key": "Ver1",
"doc_count": 6
},
{
"key": "SQD_1/default",
"doc_count": 3
},
{
"key": "DIVER/DIVER_Services/DIVER_Services-daily",
"doc_count": 2
},
{
"key": "FLDR_DM/test_dm_carbom",
"doc_count": 2
}
]
}
}
}

it aggregates the jobname but I also need to return with the property of the most recent one, I hope to return the most recent distinct records in hits part, can you help me with this? Thanks.


(system) #2