Filter search hits of parent documents based on value of child aggregation

Building a hotel search engine with parent/child documents indexed together - units and availability. How do I filter hits based on a max dollar amount for a given stay.


Build mapping:

PUT /units/

{

	"mappings": {
		"_doc": {
			"properties": {
				"join": {
					"type": "join",
					"relations": {
						"unit": "availability"
					}
				},
				"id": {
					"type": "integer"
				},
				"date": {
					"type": "date",
					"format": "yyyy-MM-dd"
				},
				"available": {
					"type": "boolean"
				},
				"rate": {
					"type": "integer"
				}
			}
		}
	}
}

Add unit:

PUT /units/_doc/1

{
  "join": {
    "name": "unit"
  },
  "unit_id": 1
}

Add some availability:

PUT /units/_doc/2?routing=1

{
  "join": {
    "name": "availability",
    "parent": 1
  },
  "date": "2018-11-01",
  "available": true,
  "rate": 150
}
PUT /units/_doc/3?routing=1

{
  "join": {
    "name": "availability",
    "parent": 1
  },
  "date": "2018-11-02",
  "available": true,
  "rate": 125
}
PUT /units/_doc/4?routing=1

{
  "join": {
    "name": "availability",
    "parent": 1
  },
  "date": "2018-11-03",
  "available": true,
  "rate": 210
}

Result in the following:

GET /units/_doc/_search
{
	"took": 2,
	"timed_out": false,
	"_shards": {
		"total": 5,
		"successful": 5,
		"skipped": 0,
		"failed": 0
	},
	"hits": {
		"total": 4,
		"max_score": 1.0,
		"hits": [
			{
				"_index": "units",
				"_type": "_doc",
				"_id": "1",
				"_score": 1.0,
				"_source": {
					"join": {
						"name": "unit"
					},
					"unit_id": 1
				}
			},
			{
				"_index": "units",
				"_type": "_doc",
				"_id": "2",
				"_score": 1.0,
				"_routing": "1",
				"_source": {
					"join": {
						"name": "availability",
						"parent": 1
					},
					"date": "2018-11-01",
					"available": true,
					"rate": 150
				}
			},
			{
				"_index": "units",
				"_type": "_doc",
				"_id": "3",
				"_score": 1.0,
				"_routing": "1",
				"_source": {
					"join": {
						"name": "availability",
						"parent": 1
					},
					"date": "2018-11-02",
					"available": true,
					"rate": 125
				}
			},
			{
				"_index": "units",
				"_type": "_doc",
				"_id": "4",
				"_score": 1.0,
				"_routing": "1",
				"_source": {
					"join": {
						"name": "availability",
						"parent": 1
					},
					"date": "2018-11-03",
					"available": true,
					"rate": 210
				}
			}
		]
	}
}

Here's where I'm stuck...

I am getting units which are available from 2018-11-01 to 2018-11-03. The aggregation is correctly calculating the total rate for the stay at $485.

GET /units/_doc/_search
{
	"query": {
		"bool": {
			"must": [
				{
					"has_child": {
						"type": "availability",
						"query": {
							"bool": {
								"must": [
									{"term": {"date": "2018-11-01"}},
									{"term": {"available": true}}
								]
							}
						}
					}
				},
				{
					"has_child": {
						"type": "availability",
						"query": {
							"bool": {
								"must": [
									{"term": {"date": "2018-11-02"}},
									{"term": {"available": true}}
								]
							}
						}
					}
				},
				{
					"has_child": {
						"type": "availability",
						"query": {
							"bool": {
								"must": [
									{"term": {"date": "2018-11-03"}},
									{"term": {"available": true}}
								]
							}
						}
					}
				}
			]
		}
	},
	"aggs": {
		"to_unit_ids": {
			"terms": {
				"field": "unit_id"
			},
			"aggs": {
				"to_rates": {
					"children": {
						"type" : "availability" 
					},
					"aggs": {
						"to_filtered_rates": {
							"filter": {
								"range": {
									"date": {
										"gte": "2018-11-01",
										"lte": "2018-11-03"
									}
								}
							},
							"aggs": {
								"to_total_rate": {
									"sum": {
										"field": "rate"
									}
								}
							}
						}
					}
				}
			}
		}
	}
}

{
	"took": 5,
	"timed_out": false,
	"_shards": {
		"total": 5,
		"successful": 5,
		"skipped": 0,
		"failed": 0
	},
	"hits": {
		"total": 1,
		"max_score": 3.0,
			{
				"_index": "units",
				"_type": "_doc",
				"_id": "1",
				"_score": 3.0,
				"_source": {
					"join": {
						"name": "unit"
					},
					"unit_id": 1
				}
			}
		]
	},
	"aggregations": {
		"to_unit_ids": {
			"meta": {},
			"doc_count_error_upper_bound": 0,
			"sum_other_doc_count": 0,
			"buckets": [
				{
					"key": 1,
					"doc_count": 1,
					"to_rates": {
						"meta": {},
						"doc_count": 3,
						"to_filtered_rates": {
							"meta": {},
							"doc_count": 3,
							"to_total_rate": {
								"value": 485.0
							}
						}
					}
				}
			]
		}
	}
}

Okay great... now what if I would like to filter my results for total rental costs that are below $300?

I use the same search as above, but add the extra bucket_selector to the aggs to filter the bucket results:

{
	"aggs": {
		
		...

		"to_filtered_buckets": {
			"bucket_selector": {
				"buckets_path": {
					"totalRentalRate": "to_rates>to_filtered_rates>to_total_rate"
				},
				"script": "params.totalRentalRate <= 300"
			}
		}
	}
}

{
	"took": 5,
	"timed_out": false,
	"_shards": {
		"total": 5,
		"successful": 5,
		"skipped": 0,
		"failed": 0
	},
	"hits": {
		"total": 1,
		"max_score": 3.0,
		"hits": [
			{
				"_index": "units",
				"_type": "_doc",
				"_id": "1",
				"_score": 3.0,
				"_source": {
					"join": {
						"name": "unit"
					},
					"unit_id": 1
				}
			}
		]
	},
	"aggregations": {
		"to_unit_ids": {
			"meta": {},
			"doc_count_error_upper_bound": 0,
			"sum_other_doc_count": 0,
			"buckets": []
		}
	}
}

Better but not quite...

The aggregation bucket has been filtered out but the original hits are still there.


So from here I can filter post process but this could be challenging with millions of hits.

Is there a way to filter the hits based on the results of the child aggregation?

Thanks all!

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.