Hi there,
I have an aggregation query to get documents filtered by terms of a multi value filed and regexp matching. The buckets returned don't match the results returned as per regexp match criteria.
Note: Elasticsearch version 7.17
Below is sample code:
- Mapping
PUT /sales_index
{
"settings": {
"index": {
"number_of_shards": "8",
"refresh_interval": "180s"
}
},
"mappings": {
"dynamic": "false",
"properties": {
"id": {
"type": "keyword"
},
"businessName": {
"type": "keyword"
},
"category": {
"type": "keyword"
}
}
}
}
- Ingest data
POST /sales_index/_doc/_bulk
{"index":{"_id":1}}
{"businessName":["Asda","Tesco","Sainsbury"], "category": "store"}
{"index":{"_id":2}}
{"businessName":["Amazon","John Lewis","Asda"], "category": "online"}
{"index":{"_id":3}}
{"businessName":["BNQ","BNM"], "category": "store"}
- Query with composite aggregation
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match_all": {
"boost": 1.0
}
},
{
"bool": {
"should": [
{
"terms": {
"category": [
"store",
"online"
],
"boost": 1.0,
"_name": "filter-match-category-"
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
{
"bool": {
"should": [
{
"regexp": {
"businessName": {
"value": "As.*",
"flags_value": 255,
"case_insensitive": true,
"max_determinized_states": 10000,
"boost": 1.0
}
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"track_total_hits": -1,
"aggregations": {
"composite_facet": {
"composite": {
"size": 100,
"sources": [
{
"businessName": {
"terms": {
"field": "businessName",
"order": "asc"
}
}
}
]
}
}
}
}
- Query results
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 8,
"successful" : 8,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"composite_facet" : {
"after_key" : {
"businessName" : "Tesco"
},
"buckets" : [
{
"key" : {
"businessName" : "Amazon"
},
"doc_count" : 1
},
{
"key" : {
"businessName" : "Asda"
},
"doc_count" : 2
},
{
"key" : {
"businessName" : "John Lewis"
},
"doc_count" : 1
},
{
"key" : {
"businessName" : "Sainsbury"
},
"doc_count" : 1
},
{
"key" : {
"businessName" : "Tesco"
},
"doc_count" : 1
}
]
}
}
}
- Expected results:
"aggregations": {
"composite_facet": {
"buckets": [
{
"key": {
"businessName": "Amazon"
},
"doc_count": 1
},
{
"key": {
"businessName": "Asda"
},
"doc_count": 2
}
]
}
}
Please suggest how to optimise the query to get the Expected results that match as per regexp query
Thanks
Natraj