How execute AND query in elasticsearch and aggregate

Hello, I have product_supplier Index which contains products and its suppliers. I have given suppliers names and I want to get the products which are supplied by the given suppliers.
For eg. I have given supplier "supplier1" and "supplier2". And I want to get "chips" and "mango" product as it's been supplied by this suppliers. And after that I want to aggregate the product name. Here is the total hits I have.

"hits":[
      {
        "_index": "products_supplier",
        "_type": "_doc",
        "_id": "1",
        "_score": 1.0,
        "_source": {
          "product_name": "maggie",
          "product_supplier": "supplier1"
        }
      },
      {
        "_index": "products_supplier",
        "_type": "_doc",
        "_id": "2",
        "_score": 1.0,
        "_source": {
          "product_name": "maggie",
          "product_supplier": "supplier2"
        }
      },
      {
        "_index": "products_supplier",
        "_type": "_doc",
        "_id": "3",
        "_score": 1.0,
        "_source": {
          "product_name": "potatoes",
          "product_supplier": "supplier1"
        }
      },
      {
        "_index": "products_supplier",
        "_type": "_doc",
        "_id": "4",
        "_score": 1.0,
        "_source": {
          "product_name": "chips",
          "product_supplier": "supplier1"
        }
      },
      {
        "_index": "products_supplier",
        "_type": "_doc",
        "_id": "5",
        "_score": 1.0,
        "_source": {
          "product_name": "maggie",
          "product_supplier": "supplier3"
        }
      },
      {
        "_index": "products_supplier",
        "_type": "_doc",
        "_id": "6",
        "_score": 1.0,
        "_source": {
          "product_name": "mango",
          "product_supplier": "supplier1"
        }
      },
      {
        "_index": "products_supplier",
        "_type": "_doc",
        "_id": "7",
        "_score": 1.0,
        "_source": {
          "product_name": "chips",
          "product_supplier": "supplier2"
        }
      },
      {
        "_index": "products_supplier",
        "_type": "_doc",
        "_id": "8",
        "_score": 1.0,
        "_source": {
          "product_name": "chips",
          "product_supplier": "supplier3"
        }
      }
    ]

I have query like this

GET /products_supplier/_search
{
 "query": {
   "bool": {
     "should": [
       {"term": {
         "product_supplier": {
           "value": "supplier2"
         }
       }},
       {"term": {
         "product_supplier": {
           "value": "supplier1"
         }
       }}
     ]
   }
 }, "aggs": {
   "product_name": {
    "terms": {
      "field": "product_name.keyword"
    }
   }
 }
}

And I'm getting this

"hits" : {
    "total" : {
      "value" : 6,
      "relation" : "eq"
    },
    "max_score" : 1.3862944,
    "hits" : [
      {
        "_index" : "products_supplier",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.3862944,
        "_source" : {
          "product_name" : "maggie",
          "product_supplier" : "supplier2"
        }
      },
      {
        "_index" : "products_supplier",
        "_type" : "_doc",
        "_id" : "7",
        "_score" : 1.3862944,
        "_source" : {
          "product_name" : "chips",
          "product_supplier" : "supplier2"
        }
      },
      {
        "_index" : "products_supplier",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.7985077,
        "_source" : {
          "product_name" : "maggie",
          "product_supplier" : "supplier1"
        }
      },
      {
        "_index" : "products_supplier",
        "_type" : "_doc",
        "_id" : "3",
        "_score" : 0.7985077,
        "_source" : {
          "product_name" : "potatoes",
          "product_supplier" : "supplier1"
        }
      },
      {
        "_index" : "products_supplier",
        "_type" : "_doc",
        "_id" : "4",
        "_score" : 0.7985077,
        "_source" : {
          "product_name" : "chips",
          "product_supplier" : "supplier1"
        }
      },
      {
        "_index" : "products_supplier",
        "_type" : "_doc",
        "_id" : "6",
        "_score" : 0.7985077,
        "_source" : {
          "product_name" : "mango",
          "product_supplier" : "supplier1"
        }
      }
    ]
  },
  "aggregations" : {
    "product_name" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "chips",
          "doc_count" : 2
        },
        {
          "key" : "maggie",
          "doc_count" : 2
        },
        {
          "key" : "mango",
          "doc_count" : 1
        },
        {
          "key" : "potatoes",
          "doc_count" : 1
        }
      ]
    }
  }
}

i only like to get "mango" and "chips" product names

I can think of two approaches filtering your current query so that you only see "chips" and "mango".

The first is to update the logic in the boolean query and use a nested boolean query. Currently, your query is searching for documents with a product_supplier of supplier1 or supplier1. You can add a clause so that the query searches for documents with product_supplier of supplier1 or supplier1 AND a product_name of chips or mango. That would look like this:

GET /products_supplier/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "should": [
              {
                "term": {
                  "product_supplier": {
                    "value": "supplier2"
                  }
                }
              },
              {
                "term": {
                  "product_supplier": {
                    "value": "supplier1"
                  }
                }
              }
            ]
          }
        },
        {
          "bool": {
            "should": [
              {
                "term": {
                  "product_name": {
                    "value": "mango"
                  }
                }
              },
              {
                "term": {
                  "product_name": {
                    "value": "chips"
                  }
                }
              }
            ]
          }
        }
      ]
    }
  },
  "aggs": {
    "product_name": {
      "terms": {
        "field": "product_name.keyword"
      }
    }
  }
}

The query returns three documents, and the aggregation runs over those.

However, you can simplify that query by replacing the multiple term clauses within the should clauses with a single terms clause, giving you this:

GET /products_supplier/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "terms": {
            "product_supplier": [
              "supplier2",
              "supplier1"
            ]
          }
        },
        {
          "terms": {
            "product_name": [
              "mango",
              "chips"
            ]
          }
        }
      ]
    }
  },
  "aggs": {
    "product_name": {
      "terms": {
        "field": "product_name.keyword"
      }
    }
  }
}

To me, this is much easier to read!

The second approach you could take is to add a filter agg to your aggregation.

GET /products_supplier/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "terms": {
            "product_supplier": [
              "supplier2",
              "supplier1"
            ]
          }
        }
      ]
    }
  },
  "aggs": {
    "product_filter": {
      "filter": {
        "terms": {
          "product_name.keyword": [
            "chips",
            "mango"
          ]
        }
      },
      "aggs": {
        "product_name": {
          "terms": {
            "field": "product_name.keyword"
          }
        }
      }
    }
  }
}

Here, the "query" returns six documents, but because of the filter clause, the aggregation only includes the values for chips and mango.

I hope these suggestions help you move forward with Elasticsearch.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.