[QUERY] Top hits of nested fields sum aggregated by parent document

Hi,

I am a begginner in Elastic Search and I don't know how to write my aggregation correctly and even if what I want is possible.
But between nested, inner_hits, top_hits, reverse_nested, bucket_sort, composite, etc... there is probably a solution somewhere :).

What I want and what I can achieve:

  • I have 1 to 1.5M documents containing 1 to 4000 nested documents (but total nested doc < 2M).
    1. I need to filter by several fields on NESTED DOCUMENTS (not on main) for all the following aggregations
    1. I need the total number of MAIN DOCUMENTS once filtered by NESTED DOCUMENTS field(s)
    1. I need global stats (sum, avg) on some NESTED DOCUMENT FIELDS
    1. I need global median on some NESTED DOCUMENT FIELDS
    1. I need to return top_hits of SUM (not count) from NESTED DOCUMENT FIELDS aggregated by a NESTED DOCUMENT FIELD

This is where I'm having trouble:

    1. I need to return top_hits of SUM (not count) from NESTED DOCUMENT FIELDS aggregated by parent document (don't know how)

I have created an example with the same type of mapping of my documents (the mapping is not fixed and can be changed if necessary for a solution) :

Some query for testing:

PUT /category
{
  "settings" : {
      "number_of_shards" : 2,
      "number_of_replicas" : 1
  },
  "mappings" : {
    "properties" : {
      "id" : { "type" : "keyword" },
      "name" : { "type" : "text" },
      "available" : { "type" : "boolean" },
      "products" : { 
        "type": "nested",
        "properties": {
          "id" : { "type" : "keyword" },
          "name" : { "type" : "text" },
          "price" : { "type" : "double" },
          "quantity" : { "type" : "integer" },
          "weight" : { "type" : "double" },
          "color" : { "type" : "keyword" },
          "brand" : {
            "type": "object",
            "properties": {
              "id" : { "type" : "keyword" },
              "name" : { "type" : "text" }
            }
          },
          "madeIn" : {
            "type": "object",
            "properties": {
              "id" : { "type" : "keyword" },
              "name" : { "type" : "text" }
            }
          }
        }
      }
    }
  }
}
PUT /category/_doc/1
{
  "id":"1",
  "name":"car",
  "available": true,
  "products": [
    {
      "id":"1",
      "name":"ferrari",
      "price": 10000,
      "quantity":2,
      "weight": 1255000,
      "color": "red",
      "brand": {
        "id": 1,
        "name": "ferrari"
      },
      "madeIn": {
        "id": 1,
        "name":"Italy"
      }
    },
    {
      "id":"2",
      "name":"clio V",
      "price": 2000,
      "quantity":20,
      "weight": 1250000,
      "color": "blue",
      "brand": {
        "id": 2,
        "name": "renault"
      },
      "madeIn": {
        "id": 2,
        "name":"France"
      }
    },
    {
      "id":"3",
      "name":"alpine a110",
      "price": 8000,
      "quantity":5,
      "weight": 1102000,
      "color": "blue",
      "brand": {
        "id": 3,
        "name": "alpine"
      },
      "madeIn": {
        "id": 2,
        "name":"France"
      }
    },
    {
      "id":"4",
      "name":"megane 4",
      "price": 6000,
      "quantity":35,
      "weight": 1318000,
      "color": "red",
      "brand": {
        "id": 2,
        "name": "renault"
      },
      "madeIn": {
        "id": 2,
        "name":"France"
      }
    },
    {
      "id":"5",
      "name":"opel corsa",
      "price": 4000,
      "quantity":50,
      "weight": 1165000,
      "color": "red",
      "brand": {
        "id": 4,
        "name": "opel"
      },
      "madeIn": {
        "id": 3,
        "name":"Germany"
      }
    },
    {
      "id":"6",
      "name":"zoe",
      "price": 5000,
      "quantity":100,
      "weight": 1502000,
      "color": "red",
      "brand": {
        "id": 2,
        "name": "renault"
      },
      "madeIn": {
        "id": 2,
        "name":"France"
      }
    }
  ]
}

PUT /category/_doc/2
{
  "id":"2",
  "name":"lego",
  "available": true,
  "products": [
    {
      "id":"7",
      "name":"plate 2x4 B",
      "price": 10,
      "quantity":20000,
      "weight": 5,
      "color": "blue",
      "brand": {
        "id": 5,
        "name": "lego"
      },
      "madeIn": {
        "id": 4,
        "name":"Denmark"
      }
    },
    {
      "id":"8",
      "name":"plate 12x4 R",
      "price": 100,
      "quantity":2000,
      "weight": 30,
      "color": "red",
      "brand": {
        "id": 5,
        "name": "lego"
      },
      "madeIn": {
        "id": 4,
        "name":"Denmark"
      }
    },
    {
      "id":"9",
      "name":"seat 2x2x2 R",
      "price": 4,
      "quantity":10000,
      "weight": 7,
      "color": "red",
      "brand": {
        "id": 5,
        "name": "lego"
      },
      "madeIn": {
        "id": 4,
        "name":"Denmark"
      }
    },
    {
      "id":"10",
      "name":"flat tile 1x2 Bk",
      "price": 2,
      "quantity":5000,
      "weight": 2,
      "color": "black",
      "brand": {
        "id": 5,
        "name": "lego"
      },
      "madeIn": {
        "id": 4,
        "name":"Denmark"
      }
    },
    {
      "id":"11",
      "name":"minifig yoda",
      "price": 1000,
      "quantity":2,
      "weight": 20,
      "color": "green",
      "brand": {
        "id": 5,
        "name": "lego"
      },
      "madeIn": {
        "id": 4,
        "name":"Denmark"
      }
    }
  ]
}

PUT /category/_doc/3
{
  "id":"3",
  "name":"food",
  "available": true,
  "products": [
    {
      "id":"12",
      "name":"tomato",
      "price": 1,
      "quantity":200000,
      "weight": 200,
      "color": "red",
      "brand": {
        "id": 5,
        "name": "barilla"
      },
      "madeIn": {
        "id": 1,
        "name":"Italy"
      }
    },
    {
      "id":"13",
      "name":"very rare spicy",
      "price": 12000,
      "quantity":20,
      "weight": 100,
      "color": "red",
      "brand": {
        "id": 6,
        "name": "unknown"
      },
      "madeIn": {
        "id": 5,
        "name":"Chili"
      }
    },
    {
      "id":"14",
      "name":"spinach",
      "price": 10,
      "quantity":555555,
      "weight": 50,
      "color": "green",
      "brand": {
        "id": 6,
        "name": "unknown"
      },
      "madeIn": {
        "id": 2,
        "name":"France"
      }
    }
  ]
}
POST category/_search
{
  "size":0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "productsAggs": {
      "nested": {
        "path": "products"
      },
      "aggs": {
        "productsFiltered": {
          //1. First apply the filter(s) if needed
          "filter": {
            "term": {
              "products.color": "red"
            }
          },
          "aggs": {
            //2. total number of found categories after filtering by color (or anything else)
            "totalFoundCategories" : {
              "reverse_nested": {}
            },
            //3. sum and avg price
            "priceStats": {
              "stats": {
                "field": "products.price"
              }
            },
            //3. sum and avg quantity
            "quantityStats": {
              "stats": {
                "field": "products.quantity"
              }
            },
            //3. sum and avg weight
            "weightStats": {
              "stats": {
                "field": "products.weight"
              }
            },
            //4. median price
            "priceMedian": {
              "percentiles": {
                "field": "products.price",
                "percents": [
                  50
                ]
              }
            },
            //4. median quantity
            "quantityMedian": {
              "percentiles": {
                "field": "products.quantity",
                "percents": [
                  50
                ]
              }
            },
            //4. median weight
            "weightMedian": {
              "percentiles": {
                "field": "products.weight",
                "percents": [
                  50
                ]
              }
            },
            //5. TOP XX prices aggregated by brand
            "byBrand": {
              "terms": {
                "field": "products.brand.id",
                "size": 100
              },
              "aggs": {
                "sumByBrand": {
                  "sum": {
                    "field": "products.price"
                  }
                },
                "price_brand_bucket_sort": {
                  "bucket_sort": {
                    "sort": [
                      {
                        "sumByBrand": {"order": "desc"}
                      }
                    ],
                    "size":100
                  }
                },
                "name" : {
                  "top_hits": {
                    "size": 1, 
                    "_source": ["products.brand.name"]
                  }
                }
              }
            },
            //5. TOP XX price aggregated by madeIn
            "byMadeIn": {
              "terms": {
                "field": "products.madeIn.id"
              },
              "aggs": {
                "sumByMadeIn": {
                  "sum": {
                    "field": "products.price"
                  }
                },
                "price_madeIn_bucket_sort": {
                  "bucket_sort": {
                    "sort": [
                      {
                        "sumByMadeIn": {"order": "desc"}
                      }
                    ],
                    "size":100
                  }
                },
                "name" : {
                  "top_hits": {
                    "size": 1, 
                    "_source": ["products.madeIn.name"]
                  }
                }
              }
            }
            //6. The agg missing : top category hits sorted by sum(price) of their red products
            //We can't use terms to group by category because we can have 1-2M category items.
          }
        }
      }
    }
  }
}

NOTE :

  • In my example there are only few categories and products, in my environment, the main total number of documents is around 1.5M and sub documents around 2M.
  • I am testing this query on Elastic 8.15.2

From Elasticsearch to Elastic Search

From Elastic Search to Elasticsearch