Building a hotel search engine with parent/child documents indexed together - units
and availability
. How do I filter hits based on a max dollar amount for a given stay.
Build mapping:
PUT /units/
{
"mappings": {
"_doc": {
"properties": {
"join": {
"type": "join",
"relations": {
"unit": "availability"
}
},
"id": {
"type": "integer"
},
"date": {
"type": "date",
"format": "yyyy-MM-dd"
},
"available": {
"type": "boolean"
},
"rate": {
"type": "integer"
}
}
}
}
}
Add unit:
PUT /units/_doc/1
{
"join": {
"name": "unit"
},
"unit_id": 1
}
Add some availability:
PUT /units/_doc/2?routing=1
{
"join": {
"name": "availability",
"parent": 1
},
"date": "2018-11-01",
"available": true,
"rate": 150
}
PUT /units/_doc/3?routing=1
{
"join": {
"name": "availability",
"parent": 1
},
"date": "2018-11-02",
"available": true,
"rate": 125
}
PUT /units/_doc/4?routing=1
{
"join": {
"name": "availability",
"parent": 1
},
"date": "2018-11-03",
"available": true,
"rate": 210
}
Result in the following:
GET /units/_doc/_search
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 1.0,
"hits": [
{
"_index": "units",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"join": {
"name": "unit"
},
"unit_id": 1
}
},
{
"_index": "units",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_routing": "1",
"_source": {
"join": {
"name": "availability",
"parent": 1
},
"date": "2018-11-01",
"available": true,
"rate": 150
}
},
{
"_index": "units",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_routing": "1",
"_source": {
"join": {
"name": "availability",
"parent": 1
},
"date": "2018-11-02",
"available": true,
"rate": 125
}
},
{
"_index": "units",
"_type": "_doc",
"_id": "4",
"_score": 1.0,
"_routing": "1",
"_source": {
"join": {
"name": "availability",
"parent": 1
},
"date": "2018-11-03",
"available": true,
"rate": 210
}
}
]
}
}
Here's where I'm stuck...
I am getting units which are available from 2018-11-01
to 2018-11-03
. The aggregation is correctly calculating the total rate for the stay at $485
.
GET /units/_doc/_search
{
"query": {
"bool": {
"must": [
{
"has_child": {
"type": "availability",
"query": {
"bool": {
"must": [
{"term": {"date": "2018-11-01"}},
{"term": {"available": true}}
]
}
}
}
},
{
"has_child": {
"type": "availability",
"query": {
"bool": {
"must": [
{"term": {"date": "2018-11-02"}},
{"term": {"available": true}}
]
}
}
}
},
{
"has_child": {
"type": "availability",
"query": {
"bool": {
"must": [
{"term": {"date": "2018-11-03"}},
{"term": {"available": true}}
]
}
}
}
}
]
}
},
"aggs": {
"to_unit_ids": {
"terms": {
"field": "unit_id"
},
"aggs": {
"to_rates": {
"children": {
"type" : "availability"
},
"aggs": {
"to_filtered_rates": {
"filter": {
"range": {
"date": {
"gte": "2018-11-01",
"lte": "2018-11-03"
}
}
},
"aggs": {
"to_total_rate": {
"sum": {
"field": "rate"
}
}
}
}
}
}
}
}
}
}
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 3.0,
{
"_index": "units",
"_type": "_doc",
"_id": "1",
"_score": 3.0,
"_source": {
"join": {
"name": "unit"
},
"unit_id": 1
}
}
]
},
"aggregations": {
"to_unit_ids": {
"meta": {},
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1,
"to_rates": {
"meta": {},
"doc_count": 3,
"to_filtered_rates": {
"meta": {},
"doc_count": 3,
"to_total_rate": {
"value": 485.0
}
}
}
}
]
}
}
}
Okay great... now what if I would like to filter my results for total rental costs that are below $300?
I use the same search as above, but add the extra bucket_selector
to the aggs to filter the bucket results:
{
"aggs": {
...
"to_filtered_buckets": {
"bucket_selector": {
"buckets_path": {
"totalRentalRate": "to_rates>to_filtered_rates>to_total_rate"
},
"script": "params.totalRentalRate <= 300"
}
}
}
}
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 3.0,
"hits": [
{
"_index": "units",
"_type": "_doc",
"_id": "1",
"_score": 3.0,
"_source": {
"join": {
"name": "unit"
},
"unit_id": 1
}
}
]
},
"aggregations": {
"to_unit_ids": {
"meta": {},
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
}
Better but not quite...
The aggregation bucket has been filtered out but the original hits are still there.
So from here I can filter post process but this could be challenging with millions of hits.
Is there a way to filter the hits based on the results of the child aggregation?
Thanks all!