Merge doc_count aggregation and sum


(Zekeriya Kaplan) #1

For this question I want you to focus on "entities" field

I have lots of this kind of data in ES

{
  "sighting_of_ref": "indicator--6fcfde95-97b8-49b4-a9c2-0be60d7de3bf",
  "observed_data_refs": [
    "observed-data--6e56cd33-6255-4ec0-9a2b-56d2d0d1775e"
  ],
  "x_nova_confidence": "0.0",
  "created": "2017-11-28T09:01:03.000156Z",
  "modified": "2017-11-28T09:01:03.000156Z",
  "created_by_ref": "identity--dd33d8b8-6eb8-44d0-a86a-f524968510d2",
  "id": "sighting--e93898cf-ff00-4991-a63a-a11e6a198e1e",
  "x_nova_extensions": {
    "ids_cumulative_result_per_user": {
      "total_duration": "8.3479386156E10",
      "total_count": "8.3479386156E10"
    },
    "entities": [
      {
        "text": "49",
        "type": "sourceCountryCode"
      },
      {
        "text": "02122219455@53.205.223.157",
        "type": "caller"
      }
    ]
  },
  "type": "sighting",
  "labels": [
    "rule-hit",
    "rule-hit-id-rule_174",
    "alert"
  ]
}

In some situation, even if x_nova_extension.entities.type value may be different like "caller,callee,CallFrom, CallTo", those represent the same thing for my case

What I need to aggregate is count of different x_nova_extension.entities.type for only x_nova_extension.entities.type = "caller","calle","Callfrom","CallTo"

Here is my query

{
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        {
          "nested": {
            "path": "x_nova_extensions.entities",
            "query": {
              "bool": {
                "filter": [
                  {
                    "match": {
                      "x_nova_extensions.entities.text": "49"
                    }
                  },
                  {
                    "terms": {
                      "x_nova_extensions.entities.type": [
                        "sourceCountryCode",
                        "CallerIPCountryCode",
                        "CallerIPCountryName",
                        "CallerIPCountryCode",
                        "CallerPhoneCountryName"
                      ]
                    }
                  }
                ]
              }
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "HIGH_RISK_USERS": {
      "nested": {
        "path": "x_nova_extensions.entities"
      },
      "aggs": {
        "aggs1": {
          "terms": {
            "field": "x_nova_extensions.entities.type",
            "include": [
              "caller",
              "callee",
              "CallFrom",
              "CallTo"
            ]
          },
          "aggs": {
            "USERS_COUNT": {
              "terms": {
                "field": "x_nova_extensions.entities.text",
                "size": 10,
                "order": {
                  "_count": "desc"
                }
              }
            }
          }
        }
      }
    }
  }
}

And the output of this query like

        {
          "took": 4,
          "timed_out": false,
          "_shards": {
            "total": 5,
            "successful": 5,
            "failed": 0
          },
          "hits": {
            "total": 502,
            "max_score": 0,
            "hits": []
          },
          "aggregations": {
            "HIGH_RISK_USERS": {
              "doc_count": 1004,
              "aggs1": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                  {
                    "key": "**caller**",
                    "doc_count": 500,
                    "USERS_COUNT": {
                      "doc_count_error_upper_bound": 5,
                      "sum_other_doc_count": 435,
                      "buckets": [
                        {
                          "key": "02122219455@53.205.223.157",
                          "doc_count": 42
                        },
                        {
                          "key": "+02123601826@53.205.223.157",
                          "doc_count": 3
                        },
                        {
                          "key": "02120088524@53.205.223.157",
                          "doc_count": 1
                        }
                      ]
                    }
                  },
                  {
                    "key": "**CallFrom**",
                    "doc_count": 2,
                    "USERS_COUNT": {
                      "doc_count_error_upper_bound": 0,
                      "sum_other_doc_count": 0,
                      "buckets": [
                        {
                          "key": "02122219455@53.205.223.157",
                          "doc_count": 25
                        }
                      ]
                    }
                  }
                ]
              }
            }
          }
        }

But I want the result above like this format

{
  "took": 4,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 502,
    "max_score": 0,
    "hits": [
      
    ]
  },
  "aggregations": {
    "HIGH_RISK_USERS": {
      "doc_count": 1004,
      "aggs1": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "**key_name_is_not_important_at_first**",
            "doc_count": 500,
            "USERS_COUNT": {
              "doc_count_error_upper_bound": 5,
              "sum_other_doc_count": 435,
              "buckets": [
                {
                  "key": "02122219455@53.205.223.157",
                  "doc_count": 67
                },
                {
                  "key": "+02123601826@53.205.223.157",
                  "doc_count": 3
                },
                {
                  "key": "02120088524@53.205.223.157",
                  "doc_count": 1
                }
              ]
            }
          }
        ]
      }
    }
  }
}

I mean I want to merge result for bucket "caller" and bucket "CallFrom" and sum "doc_count" for every unique key

What my query is supposed to be?


(system) #2

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.