For this question I want you to focus on "entities" field
I have lots of this kind of data in ES
{
"sighting_of_ref": "indicator--6fcfde95-97b8-49b4-a9c2-0be60d7de3bf",
"observed_data_refs": [
"observed-data--6e56cd33-6255-4ec0-9a2b-56d2d0d1775e"
],
"x_nova_confidence": "0.0",
"created": "2017-11-28T09:01:03.000156Z",
"modified": "2017-11-28T09:01:03.000156Z",
"created_by_ref": "identity--dd33d8b8-6eb8-44d0-a86a-f524968510d2",
"id": "sighting--e93898cf-ff00-4991-a63a-a11e6a198e1e",
"x_nova_extensions": {
"ids_cumulative_result_per_user": {
"total_duration": "8.3479386156E10",
"total_count": "8.3479386156E10"
},
"entities": [
{
"text": "49",
"type": "sourceCountryCode"
},
{
"text": "02122219455@53.205.223.157",
"type": "caller"
}
]
},
"type": "sighting",
"labels": [
"rule-hit",
"rule-hit-id-rule_174",
"alert"
]
}
In some situation, even if x_nova_extension.entities.type value may be different like "caller,callee,CallFrom, CallTo", those represent the same thing for my case
What I need to aggregate is count of different x_nova_extension.entities.type for only x_nova_extension.entities.type = "caller","calle","Callfrom","CallTo"
Here is my query
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "x_nova_extensions.entities",
"query": {
"bool": {
"filter": [
{
"match": {
"x_nova_extensions.entities.text": "49"
}
},
{
"terms": {
"x_nova_extensions.entities.type": [
"sourceCountryCode",
"CallerIPCountryCode",
"CallerIPCountryName",
"CallerIPCountryCode",
"CallerPhoneCountryName"
]
}
}
]
}
}
}
}
]
}
},
"aggs": {
"HIGH_RISK_USERS": {
"nested": {
"path": "x_nova_extensions.entities"
},
"aggs": {
"aggs1": {
"terms": {
"field": "x_nova_extensions.entities.type",
"include": [
"caller",
"callee",
"CallFrom",
"CallTo"
]
},
"aggs": {
"USERS_COUNT": {
"terms": {
"field": "x_nova_extensions.entities.text",
"size": 10,
"order": {
"_count": "desc"
}
}
}
}
}
}
}
}
}
And the output of this query like
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 502,
"max_score": 0,
"hits": []
},
"aggregations": {
"HIGH_RISK_USERS": {
"doc_count": 1004,
"aggs1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "**caller**",
"doc_count": 500,
"USERS_COUNT": {
"doc_count_error_upper_bound": 5,
"sum_other_doc_count": 435,
"buckets": [
{
"key": "02122219455@53.205.223.157",
"doc_count": 42
},
{
"key": "+02123601826@53.205.223.157",
"doc_count": 3
},
{
"key": "02120088524@53.205.223.157",
"doc_count": 1
}
]
}
},
{
"key": "**CallFrom**",
"doc_count": 2,
"USERS_COUNT": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "02122219455@53.205.223.157",
"doc_count": 25
}
]
}
}
]
}
}
}
}
But I want the result above like this format
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 502,
"max_score": 0,
"hits": [
]
},
"aggregations": {
"HIGH_RISK_USERS": {
"doc_count": 1004,
"aggs1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "**key_name_is_not_important_at_first**",
"doc_count": 500,
"USERS_COUNT": {
"doc_count_error_upper_bound": 5,
"sum_other_doc_count": 435,
"buckets": [
{
"key": "02122219455@53.205.223.157",
"doc_count": 67
},
{
"key": "+02123601826@53.205.223.157",
"doc_count": 3
},
{
"key": "02120088524@53.205.223.157",
"doc_count": 1
}
]
}
}
]
}
}
}
}
I mean I want to merge result for bucket "caller" and bucket "CallFrom" and sum "doc_count" for every unique key
What my query is supposed to be?