[QUERY] Top hits of nested fields sum aggregated by parent document

Hi,

I am a begginner in Elastic Search and I don't know how to write my aggregation correctly and even if what I want is possible.
But between nested, inner_hits, top_hits, reverse_nested, bucket_sort, composite, etc... there is probably a solution somewhere :).

What I want and what I can achieve:

  • I have 1 to 1.5M documents containing 1 to 4000 nested documents (but total nested doc < 2M).
    1. I need to filter by several fields on NESTED DOCUMENTS (not on main) for all the following aggregations
    1. I need the total number of MAIN DOCUMENTS once filtered by NESTED DOCUMENTS field(s)
    1. I need global stats (sum, avg) on some NESTED DOCUMENT FIELDS
    1. I need global median on some NESTED DOCUMENT FIELDS
    1. I need to return top_hits of SUM (not count) from NESTED DOCUMENT FIELDS aggregated by a NESTED DOCUMENT FIELD

This is where I'm having trouble:

    1. I need to return top_hits of SUM (not count) from NESTED DOCUMENT FIELDS aggregated by parent document (don't know how)

I have created an example with the same type of mapping of my documents (the mapping is not fixed and can be changed if necessary for a solution) :

Some query for testing:

PUT /category
{
  "settings" : {
      "number_of_shards" : 2,
      "number_of_replicas" : 1
  },
  "mappings" : {
    "properties" : {
      "id" : { "type" : "keyword" },
      "name" : { "type" : "text" },
      "available" : { "type" : "boolean" },
      "products" : { 
        "type": "nested",
        "properties": {
          "id" : { "type" : "keyword" },
          "name" : { "type" : "text" },
          "price" : { "type" : "double" },
          "quantity" : { "type" : "integer" },
          "weight" : { "type" : "double" },
          "color" : { "type" : "keyword" },
          "brand" : {
            "type": "object",
            "properties": {
              "id" : { "type" : "keyword" },
              "name" : { "type" : "text" }
            }
          },
          "madeIn" : {
            "type": "object",
            "properties": {
              "id" : { "type" : "keyword" },
              "name" : { "type" : "text" }
            }
          }
        }
      }
    }
  }
}
PUT /category/_doc/1
{
  "id":"1",
  "name":"car",
  "available": true,
  "products": [
    {
      "id":"1",
      "name":"ferrari",
      "price": 10000,
      "quantity":2,
      "weight": 1255000,
      "color": "red",
      "brand": {
        "id": 1,
        "name": "ferrari"
      },
      "madeIn": {
        "id": 1,
        "name":"Italy"
      }
    },
    {
      "id":"2",
      "name":"clio V",
      "price": 2000,
      "quantity":20,
      "weight": 1250000,
      "color": "blue",
      "brand": {
        "id": 2,
        "name": "renault"
      },
      "madeIn": {
        "id": 2,
        "name":"France"
      }
    },
    {
      "id":"3",
      "name":"alpine a110",
      "price": 8000,
      "quantity":5,
      "weight": 1102000,
      "color": "blue",
      "brand": {
        "id": 3,
        "name": "alpine"
      },
      "madeIn": {
        "id": 2,
        "name":"France"
      }
    },
    {
      "id":"4",
      "name":"megane 4",
      "price": 6000,
      "quantity":35,
      "weight": 1318000,
      "color": "red",
      "brand": {
        "id": 2,
        "name": "renault"
      },
      "madeIn": {
        "id": 2,
        "name":"France"
      }
    },
    {
      "id":"5",
      "name":"opel corsa",
      "price": 4000,
      "quantity":50,
      "weight": 1165000,
      "color": "red",
      "brand": {
        "id": 4,
        "name": "opel"
      },
      "madeIn": {
        "id": 3,
        "name":"Germany"
      }
    },
    {
      "id":"6",
      "name":"zoe",
      "price": 5000,
      "quantity":100,
      "weight": 1502000,
      "color": "red",
      "brand": {
        "id": 2,
        "name": "renault"
      },
      "madeIn": {
        "id": 2,
        "name":"France"
      }
    }
  ]
}

PUT /category/_doc/2
{
  "id":"2",
  "name":"lego",
  "available": true,
  "products": [
    {
      "id":"7",
      "name":"plate 2x4 B",
      "price": 10,
      "quantity":20000,
      "weight": 5,
      "color": "blue",
      "brand": {
        "id": 5,
        "name": "lego"
      },
      "madeIn": {
        "id": 4,
        "name":"Denmark"
      }
    },
    {
      "id":"8",
      "name":"plate 12x4 R",
      "price": 100,
      "quantity":2000,
      "weight": 30,
      "color": "red",
      "brand": {
        "id": 5,
        "name": "lego"
      },
      "madeIn": {
        "id": 4,
        "name":"Denmark"
      }
    },
    {
      "id":"9",
      "name":"seat 2x2x2 R",
      "price": 4,
      "quantity":10000,
      "weight": 7,
      "color": "red",
      "brand": {
        "id": 5,
        "name": "lego"
      },
      "madeIn": {
        "id": 4,
        "name":"Denmark"
      }
    },
    {
      "id":"10",
      "name":"flat tile 1x2 Bk",
      "price": 2,
      "quantity":5000,
      "weight": 2,
      "color": "black",
      "brand": {
        "id": 5,
        "name": "lego"
      },
      "madeIn": {
        "id": 4,
        "name":"Denmark"
      }
    },
    {
      "id":"11",
      "name":"minifig yoda",
      "price": 1000,
      "quantity":2,
      "weight": 20,
      "color": "green",
      "brand": {
        "id": 5,
        "name": "lego"
      },
      "madeIn": {
        "id": 4,
        "name":"Denmark"
      }
    }
  ]
}

PUT /category/_doc/3
{
  "id":"3",
  "name":"food",
  "available": true,
  "products": [
    {
      "id":"12",
      "name":"tomato",
      "price": 1,
      "quantity":200000,
      "weight": 200,
      "color": "red",
      "brand": {
        "id": 5,
        "name": "barilla"
      },
      "madeIn": {
        "id": 1,
        "name":"Italy"
      }
    },
    {
      "id":"13",
      "name":"very rare spicy",
      "price": 12000,
      "quantity":20,
      "weight": 100,
      "color": "red",
      "brand": {
        "id": 6,
        "name": "unknown"
      },
      "madeIn": {
        "id": 5,
        "name":"Chili"
      }
    },
    {
      "id":"14",
      "name":"spinach",
      "price": 10,
      "quantity":555555,
      "weight": 50,
      "color": "green",
      "brand": {
        "id": 6,
        "name": "unknown"
      },
      "madeIn": {
        "id": 2,
        "name":"France"
      }
    }
  ]
}
POST category/_search
{
  "size":0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "productsAggs": {
      "nested": {
        "path": "products"
      },
      "aggs": {
        "productsFiltered": {
          //1. First apply the filter(s) if needed
          "filter": {
            "term": {
              "products.color": "red"
            }
          },
          "aggs": {
            //2. total number of found categories after filtering by color (or anything else)
            "totalFoundCategories" : {
              "reverse_nested": {}
            },
            //3. sum and avg price
            "priceStats": {
              "stats": {
                "field": "products.price"
              }
            },
            //3. sum and avg quantity
            "quantityStats": {
              "stats": {
                "field": "products.quantity"
              }
            },
            //3. sum and avg weight
            "weightStats": {
              "stats": {
                "field": "products.weight"
              }
            },
            //4. median price
            "priceMedian": {
              "percentiles": {
                "field": "products.price",
                "percents": [
                  50
                ]
              }
            },
            //4. median quantity
            "quantityMedian": {
              "percentiles": {
                "field": "products.quantity",
                "percents": [
                  50
                ]
              }
            },
            //4. median weight
            "weightMedian": {
              "percentiles": {
                "field": "products.weight",
                "percents": [
                  50
                ]
              }
            },
            //5. TOP XX prices aggregated by brand
            "byBrand": {
              "terms": {
                "field": "products.brand.id",
                "size": 100
              },
              "aggs": {
                "sumByBrand": {
                  "sum": {
                    "field": "products.price"
                  }
                },
                "price_brand_bucket_sort": {
                  "bucket_sort": {
                    "sort": [
                      {
                        "sumByBrand": {"order": "desc"}
                      }
                    ],
                    "size":100
                  }
                },
                "name" : {
                  "top_hits": {
                    "size": 1, 
                    "_source": ["products.brand.name"]
                  }
                }
              }
            },
            //5. TOP XX price aggregated by madeIn
            "byMadeIn": {
              "terms": {
                "field": "products.madeIn.id"
              },
              "aggs": {
                "sumByMadeIn": {
                  "sum": {
                    "field": "products.price"
                  }
                },
                "price_madeIn_bucket_sort": {
                  "bucket_sort": {
                    "sort": [
                      {
                        "sumByMadeIn": {"order": "desc"}
                      }
                    ],
                    "size":100
                  }
                },
                "name" : {
                  "top_hits": {
                    "size": 1, 
                    "_source": ["products.madeIn.name"]
                  }
                }
              }
            }
            //6. The agg missing : top category hits sorted by sum(price) of their red products
            //We can't use terms to group by category because we can have 1-2M category items.
          }
        }
      }
    }
  }
}

NOTE :

  • In my example there are only few categories and products, in my environment, the main total number of documents is around 1.5M and sub documents around 2M.
  • I am testing this query on Elastic 8.15.2

From Elasticsearch to Elastic Search

From Elastic Search to Elasticsearch

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.