Nested aggregation with min_doc_count=0

Hi all,

I'am trying to use nested aggregation with min_doc_count=0

My schema:

PUT /example
{
  "mappings": {
    "doc": {
      "properties": {
        "id": {
          "type": "text"
        },
        "properties": {
          "type": "nested",
          "properties": {
            "key": {
              "type": "keyword"
            },
            "value": {
              "type": "keyword"
            }
          }
        }
      }
    }
  }
}

My documents are like this:

{ "id": "1", "properties": [ { "key": "color", "value": [ "red" ] }, { "key": "size", "value": [ "big" ] }, { "key": "shape", "value": [ "circle" ] } ] }
and this is my values:

id=1 color=red size=big shape=circle
id=2 color=blue size=big shape=square
id=3 color=red size=small shape=circle
id=4 color=yellow size=medium shape=rectangle

This is my search request example, I want only document with red color, and with aggregations I want to know results properties and associated count :

{
  "_source": [
    "id"
  ],
  "query": {
    "bool": {
      "must": [
        {
          "nested": {
            "path": "properties",
            "query": {
              "bool": {
                "must": [
                  {
                    "match": {
                      "properties.key": "color"
                    }
                  },
                  {
                    "match": {
                      "properties.value": "red"
                    }
                  }
                ]
              }
            }
          }
        }
      ]
    }
  },
  "aggregations": {
    "filters": {
      "nested": {
        "path": "properties"
      },
      "aggregations": {
        "key": {
          "terms": {
            "field": "properties.key"
          },
          "aggregations": {
            "value": {
              "terms": {
                "field": "properties.value"
              }
            }
          }
        }
      }
    }
  }
}

The result is OK, I retrieve documents with id=1 and id=3, and with aggregations I know the 2 documents are circle, and one is small and the other is big:

 "aggregations": {
    "filters": {
      "doc_count": 6,
      "key": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "color",
            "doc_count": 2,
            "value": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                {
                  "key": "red",
                  "doc_count": 2
                }
              ]
            }
          },
          {
            "key": "shape",
            "doc_count": 2,
            "value": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                {
                  "key": "circle",
                  "doc_count": 2
                }
              ]
            }
          },
          {
            "key": "size",
            "doc_count": 2,
            "value": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                {
                  "key": "big",
                  "doc_count": 1
                },
                {
                  "key": "small",
                  "doc_count": 1
                }
              ]
            }
          }
        ]
      }
    }
  }

Now the problem is if I want to know the others possiblity of properties and to display the properties values with no document associated.

I changed my search request by adding "min_doc_count: 0 on the "field": "properties.values" aggregation.

Result, all properties values are mixed and not sorting by key:

"aggregations": {
    "filters": {
      "doc_count": 4,
      "filters": {
        "doc_count": 2,
        "properties": {
          "doc_count": 6,
          "key": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "color",
                "doc_count": 2,
                "value": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 0,
                  "buckets": [
                    {
                      "key": "red",
                      "doc_count": 2
                    },
                    {
                      "key": "big",
                      "doc_count": 0
                    },
                    {
                      "key": "circle",
                      "doc_count": 0
                    },
                    {
                      "key": "small",
                      "doc_count": 0
                    }
                  ]
                }
              },
              {
                "key": "shape",
                "doc_count": 2,
                "value": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 0,
                  "buckets": [
                    {
                      "key": "circle",
                      "doc_count": 2
                    },
                    {
                      "key": "big",
                      "doc_count": 0
                    },
                    {
                      "key": "red",
                      "doc_count": 0
                    },
                    {
                      "key": "small",
                      "doc_count": 0
                    }
                  ]
                }
              },
              {
...

It is possible to obtain only values by key in the result aggregation ?

Thank's for your help.

Julien

I'm not 100% sure I understand correctly what it is that you're trying to do. Do I understand it correctly as: you're looking for the key-value pairs that occur in all the documents that do not match your query?

If so, take a look at the global aggregation. This aggregation resets the aggregation scope to be all documents, rather than just the documents that match your query. If you would nest your existing aggregations inside of a global agg, you would get all existing key-value pairs, regardless of whether they are part of a document that matches your query:

GET example/_search
{
  "_source": [
    "id"
  ],
  "query": {
    "bool": {
      "must": [
        {
          "nested": {
            "path": "properties",
            "query": {
              "bool": {
                "must": [
                  {
                    "match": {
                      "properties.key": "color"
                    }
                  },
                  {
                    "match": {
                      "properties.value": "red"
                    }
                  }
                ]
              }
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "get_all": {
      "global": {},
      "aggregations": {
        "filters": {
          "nested": {
            "path": "properties"
          },
          "aggregations": {
            "key": {
              "terms": {
                "field": "properties.key"
              },
              "aggregations": {
                "value": {
                  "terms": {
                    "field": "properties.value"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Now if you would want to exclude the key-value pairs that do match your query, then inside of the global aggregation, you can nest a filter aggregation that is the opposite of your existing query. The request would look like this (note that inside the filter agg I use must_not instead of must to aggregate on the documents that do not match the original query):

GET example/_search
{
  "_source": [
    "id"
  ],
  "query": {
    "bool": {
      "must": [
        {
          "nested": {
            "path": "properties",
            "query": {
              "bool": {
                "must": [
                  {
                    "match": {
                      "properties.key": "color"
                    }
                  },
                  {
                    "match": {
                      "properties.value": "red"
                    }
                  }
                ]
              }
            }
          }
        }
      ]
    }
  },
  "aggs": {
    "get_all": {
      "global": {},
      "aggs": {
        "remove_matching_doc": {
          "filter": {
            "bool": {
              "must_not": [
                {
                  "nested": {
                    "path": "properties",
                    "query": {
                      "bool": {
                        "must": [
                          {
                            "match": {
                              "properties.key": "color"
                            }
                          },
                          {
                            "match": {
                              "properties.value": "red"
                            }
                          }
                        ]
                      }
                    }
                  }
                }
              ]
            }
          },
          "aggregations": {
            "filters": {
              "nested": {
                "path": "properties"
              },
              "aggregations": {
                "key": {
                  "terms": {
                    "field": "properties.key"
                  },
                  "aggregations": {
                    "value": {
                      "terms": {
                        "field": "properties.value"
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Hope this helps!

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.