Cannot get aggregation result without filter

I want a filtered docs result and not filtered aggregation result in same query.

I've been used elasticsearch 1.75 and able to get this result, but I can't do this in elasticsearch 7.
If I am missing something, please let me know.

es 1.75

es 1.75 create index and add docs

almost same as es7, I wanted put original curl sample, but because of text limit, removed it.
if someone want to it, I can add it in comment.

es 1.75 query

curl -X GET "localhost:9200/some_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "function_score": {
      "query": {
        "bool": {
          "must":{
            "match_all": {}
          }
        }
      },
      "functions": []
    }
  },
  "filter": {
     "and": [
      { "terms":{ "active": [ true ] } },
      { "terms":{ "gender": [ "m" ] } }
      
    ]
  },
  "from": 0,
  "size": 10,
  "aggs": {
    "in_gen": {
      "filter": {
        "match_all": {}
      },
      "aggs": {
        "gen": {
          "terms": {
            "field": "gender",
            "size": 30,
            "shard_size": 30
          }
        }
      }
    },
    "in_age": {
      "filter": {
        "match_all": {}
      },
      "aggs": {
        "age": {
          "terms": {
            "field": "age",
            "size": 30,
            "shard_size": 30
          }
        }
      }
    }
  }
}
'

es 1.75 result

  • hit docs are filtered result (count: 3) with active: true and gender: 'M'
  • aggregation result is for all (count: 5)
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": 1,
    "hits": [
      {
        ...
        "_source": {
          "user_id": 1,
          "active": true,
          "gender": "M",
          "age": 30
        }
      },
      {
        ...
        "_source": {
          "user_id": 2,
          "active": true,
          "gender": "M",
          "age": 37
        }
      },
      {
        ...
        "_source": {
          "user_id": 3,
          "active": true,
          "gender": "M",
          "age": 23
        }
      }
    ]
  },
  "aggregations": {
    "in_gen": {
      "doc_count": 5,
      "gen": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "m",
            "doc_count": 3
          },
          {
            "key": "f",
            "doc_count": 2
          }
        ]
      }
    },
    "in_age": {
      "doc_count": 5,
      "age": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": 23,
            "doc_count": 1
          },
          {
            "key": 27,
            "doc_count": 1
          },
          {
            "key": 30,
            "doc_count": 1
          },
          {
            "key": 31,
            "doc_count": 1
          },
          {
            "key": 37,
            "doc_count": 1
          }
        ]
      }
    }
  }
}

es 7 (7.1.0)

Version: 7.1.0, Build: default/tar/606a173/2019-05-16T00:43:15.323135Z, JVM: 1.8.0_112

es7 - create index

curl -X DELETE "localhost:9700/some_index"

curl -X PUT "localhost:9700/some_index" -H 'Content-Type: application/json' -d'
{
  "settings": {
    "number_of_shards": 5,
    "number_of_replicas": 1,
    "analysis": {}
  },
  "mappings": {
    "dynamic": false,
    "properties": {
      "user_id": {
        "type": "integer"
      },
      "active": {
        "type": "boolean"
      },
      "gender": {
        "type": "keyword"
      },
      "age": {
        "type": "integer"
      }
    }
  }
}
'

es7 - add docs


curl -X PUT "localhost:9700/some_index/_doc/1" -H "Content-Type: application/json" -d'
{
  "user_id": 1,
  "active": true,
  "gender": "M",
  "age": 30
}
'
... 4 more docs here. (I couldn't write because of text limitation)

es7 - query

curl -X GET "localhost:9700/some_index/_search" -H 'Content-Type: application/json' -d'
{
  "query": {
    "function_score": {
      "query": {
        "bool": {
          "filter": [
            { "terms":{ "active": [ true ] } },
            { "terms":{ "gender": [ "M" ] } }
          ]
        }
      },
      "functions": []
    }
  },
  "from": 0,
  "size": 10,
  "aggs": {
    "in_gen": {
      "filter": {
        "match_all": {}
      },
      "aggs": {
        "gen": {
          "terms": {
            "field": "gender",
            "size": 30,
            "shard_size": 30
          }
        }
      }
    },
    "in_age": {
      "filter": {
        "match_all": {}
      },
      "aggs": {
        "age": {
          "terms": {
            "field": "age",
            "size": 30,
            "shard_size": 30
          }
        }
      }
    }
  }
}
'

es7 result

  • hit docs are filtered result (count: 3) with active: true and gender: 'M'
  • aggregation result is filtered (count: 3) with active: true and gender: 'M'

How can I get aggregation result for all? (count: 5)

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0,
    "skipped": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 0,
    "hits": [
      {
        "_index": "some_index",
        "_type": "_doc",
        "_id": "3",
        "_score": 0,
        "_source": {
          "user_id": 3,
          "active": true,
          "gender": "M",
          "age": 23
        }
      },
      {
        "_index": "some_index",
        "_type": "_doc",
        "_id": "2",
        "_score": 0,
        "_source": {
          "user_id": 2,
          "active": true,
          "gender": "M",
          "age": 37
        }
      },
      {
        "_index": "some_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 0,
        "_source": {
          "user_id": 1,
          "active": true,
          "gender": "M",
          "age": 30
        }
      }
    ]
  },
  "aggregations": {
    "in_gen": {
      "doc_count": 3,
      "gen": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "M",
            "doc_count": 3
          }
        ]
      }
    },
    "in_age": {
      "doc_count": 3,
      "age": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": 23,
            "doc_count": 1
          },
          {
            "key": 30,
            "doc_count": 1
          },
          {
            "key": 37,
            "doc_count": 1
          }
        ]
      }
    }
  }
}

Hi @hangu_choi ,

Look like structure is not correct according to the documentation:
https://www.elastic.co/guide/en/elasticsearch/reference/7.2/query-dsl-function-score-query.html
you need to have the filter inside functions.

"query": { 
    "function_score": { 
        "query": { "match_all": {} },
        "functions": [ 
            { "filter": {"terms": {"active": [true]}}},
            {"filter": {"terms": {"gender": ["M"]}}}
....

there's alternative and more light syntaxe if you don't use function score as there's no boost or score changes, but maybe it's only for the example and in your real request you have boost etc... blablablabla :zipper_mouth_face:.
https://www.elastic.co/guide/en/elasticsearch/reference/7.2/query-filter-context.html

@gabriel_tessier

Thank you for reply.
I use function_score query.
I just omitted it because original query structure is too long.

and as I understand, filter inside of functions affect final score.
but I want to filter result without affecting score.

so, I used filter in bool query.

I think you should use a global agg first which will use all data whatever the query. See https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-global-aggregation.html

1 Like

Thank you @dadoonet,
It seems what I am looking for. I'll see it.

Thank you @dadoonet, global solved first issues I encountered.

but I have one more problem.
I want agg bucket which filtered multiple condition like below.
but It is returning Expected [START_OBJECT] error

{"error":{"root_cause":[{"type":"parsing_exception","reason":"Expected [START_OBJECT] under [filter], but got a [START_ARRAY] in [in_global_gen_filtered]","line":31,"col":21}],"type":"parsing_exception","reason":"Expected [START_OBJECT] under [filter], but got a [START_ARRAY] in [in_global_gen_filtered]","line":31,"col":21},"status":400}

How do I filter with multiple condition in aggregation bucket?

curl -X GET "localhost:9700/some_index/_search" -H 'Content-Type: application/json' -d'

{
  "query": {
    "function_score": {
      "query": {
        "bool": {
          "filter": [
            { "terms":{ "active": [ true ] } },
            { "terms":{ "gender": [ "M" ] } }
          ]
        }
      },
      "functions": []
    }
  },
  "from": 0,
  "size": 10,
  "aggs": {
    "in_global": {
      "global": {},
      "aggs": {
        "global_gen": {
          "terms": {
            "field": "gender",
            "size": 30,
            "shard_size": 30
          }
        },
        "in_global_gen_filtered": {
          "filter": [
            { "terms":{ "active": [ true ] } },
            { "terms":{ "age": [ 30 ] } }
          ],
          "aggs": {
            "global_gen_filtered": {
              "terms": {
                "field": "gender",
                "size": 30,
                "shard_size": 30
              }
            }
          }
        }
      }
    },
    "in_gen": {
      "filter": {
        "match_all": {}
      },
      "aggs": {
        "gen": {
          "terms": {
            "field": "gender",
            "size": 30,
            "shard_size": 30
          }
        }
      }
    },
    "in_age": {
      "filter": {
        "match_all": {}
      },
      "aggs": {
        "age": {
          "terms": {
            "field": "age",
            "size": 30,
            "shard_size": 30
          }
        }
      }
    }
  }
}

'

I answered in the new question you opened.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.