Buckets.doc_count > hits.total

hi,

I am searching by identifier and performing aggregation and am able to get a hits.total of 1.
However the aggregation agg.buckets[0].doc_count is giving a value of 2 (more than the number of hits).
Is there a way to fix this such that the sum of doc_count in the agg.buckets matches hits.total?

Request:
{
"size" : 10,
"query" : {
"function_score" : {
"query" : {
"multi_match" : {
"query" : "10000",
"fields" : [
"_all^1.0",
"description^2.5",
"identifier^3.0"
],
"type" : "phrase_prefix",
"operator" : "AND",
"slop" : 0,
"prefix_length" : 1,
"max_expansions" : 20,
"lenient" : false,
"zero_terms_query" : "NONE",
"boost" : 1.0
}
},
"functions" : [
{
"filter" : {
"match_all" : {
"boost" : 1.0
}
}
}
],
"score_mode" : "multiply",
"max_boost" : 3.4028235E38,
"boost" : 1.0
}
},
"post_filter" : {
"query_string" : {
"query" : "entityType:payment AND identifier:("xxx" AND "yyy")",
"default_field" : "identifier",
"fields" : [ ],
"use_dis_max" : false,
"tie_breaker" : 0.0,
"default_operator" : "and",
"auto_generate_phrase_queries" : false,
"max_determinized_states" : 100,
"enable_position_increments" : true,
"fuzziness" : "0",
"fuzzy_prefix_length" : 0,
"fuzzy_max_expansions" : 0,
"phrase_slop" : 0,
"escape" : false,
"split_on_whitespace" : true,
"all_fields" : false,
"boost" : 1.0
}
},
"min_score" : 1.0E-5,
"_source" : {
"includes" : [ ],
"excludes" : [ ]
},
"sort" : [
{
"_score" : {
"order" : "desc"
}
},
{
"_uid" : {
"order" : "asc"
}
}
],
"aggregations" : {
"agg" : {
"terms" : {
"field" : "entityType",
"value_type" : "string",
"size" : 10,
"min_doc_count" : 1,
"shard_min_doc_count" : 0,
"show_term_doc_count_error" : true,
"order" : [
{
"_count" : "desc"
},
{
"_term" : "asc"
}
]
}
}
},
"highlight" : {
"require_field_match" : false,
"fields" : {
"payload.*" : { },
"identifier" : { },
"description" : { }
}
}
}

Response:
{
"took": 1700,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 1,
"max_score": null,
"hits": [
{
"_index": "xxx-payment_1520842027004",
"_type": "payment",
"_id": "...",
"_score": 2.4415238,

			....
        }
		...
      }
    ]
  },
  "aggregations": {
    "agg": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "payment",
          "doc_count": 2,
          "doc_count_error_upper_bound": 0
        }
      ]
    }
  }
}

That is quite the request you have there. :slight_smile:

What seems to be going on here is that you have a post_filter in this request. Post filters are applied after the aggregation has been executed. As a result, post filters only influence the hits and not the aggregation scope. So, the aggregation is run on a larger set of documents than just the hits. This would explain why you are seeing a higher doc_count than the number of hits.

If this is not what you want, you could move the query_string query from the post_filter to the top-level query. You could for example wrap the function_score query in a bool query, and add a filter clause to it with the query_string query.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.