Doubt about performance elastic 2.4 matchAll X boolQuery

Good afternoon,

Browsing a legacy font I found a search in elasticSearch 2.4, which did the following: create the filters and sorts, and then apply this to a matchAll Query.
Would I have more performance if instead of matchAll use a boolQuery for example?
The search is about locating a certain item of a particular category for example.
In my tests the search with matchAll seems to be costly.
Would you have any suggestions or explanation for this behavior?

Thankful.

This is one of the executed queries, it seems to me to bring all products of the right base?

Actually, I should bring only the dresses.

{
  "from" : 0,
  "size" : 52,
  "query" : {
    "filtered" : {
      "query" : {
        "match_all" : { }
      },
      "filter" : {
        "bool" : {
          "must" : {
            "or" : {
              "filters" : [ {
                "range" : {
                  "valor" : {
                    "from" : 0.0,
                    "to" : 99999.99,
                    "include_lower" : true,
                    "include_upper" : true
                  }
                }
              } ]
            }
          }
        }
      }
    }
  },
  "sort" : [ {
    "qtdComprada" : {
      "order" : "desc"
    }
  }, {
    "_score" : { }
  }, {
    "cliques" : {
      "order" : "desc"
    }
  } ],
  "aggregations" : {
    "tipoProduto" : {
      "terms" : {
        "field" : "tipoProduto",
        "size" : 100
      }
    },
    "loja" : {
      "terms" : {
        "field" : "loja.codigo",
        "size" : 100
      }
    },
    "tamanho" : {
      "terms" : {
        "field" : "tamanhos.tamanho",
        "size" : 100
      }
    },
    "corAgrupado" : {
      "terms" : {
        "field" : "corAgrupado",
        "size" : 100
      }
    },
    "grupos" : {
      "nested" : {
        "path" : "classificacoes"
      },
      "aggregations" : {
        "grupo" : {
          "terms" : {
            "field" : "classificacoes.grupo",
            "size" : 100
          },
          "aggregations" : {
            "subgrupo" : {
              "terms" : {
                "field" : "classificacoes.subgrupo",
                "size" : 100
              },
              "aggregations" : {
                "subclasse" : {
                  "terms" : {
                    "field" : "classificacoes.subclasse",
                    "size" : 100
                  }
                }
              }
            }
          }
        }
      }
    },
    "marca" : {
      "terms" : {
        "field" : "marca.codigo",
        "size" : 100
      }
    },
    "valor" : {
      "range" : {
        "field" : "valor",
        "ranges" : [ {
          "key" : "ATE_49",
          "from" : 0.01,
          "to" : 50.0
        }, {
          "key" : "DE_50_ATE_99",
          "from" : 50.0,
          "to" : 100.0
        }, {
          "key" : "DE_100_ATE_149",
          "from" : 100.0,
          "to" : 150.0
        }, {
          "key" : "DE_149_ATE_199",
          "from" : 150.0,
          "to" : 200.0
        }, {
          "key" : "MAIOR_200",
          "from" : 200.0,
          "to" : 100000.0
        } ]
      }
    },
    "faixaEtariasLoja" : {
      "terms" : {
        "field" : "faixaEtariasLoja",
        "size" : 100
      }
    }
  },
  "suggest" : {
    "desc" : {
      "text" : " DRESSES",
      "phrase" : {
        "field" : "descritivo"
      }
    }
  }
}

Would this query have some more performative form?

{
  "from" : 0,
  "size" : 36,
  "query" : {
    "filtered" : {
      "query" : {
        "match_all" : { }
      },
      "filter" : {
        "bool" : {
          "must" : [ {
            "terms" : {
              "loja.codigo" : [ 42 ]
            }
          }, {
            "or" : {
              "filters" : [ {
                "range" : {
                  "valor" : {
                    "from" : 0.0,
                    "to" : 99999.99,
                    "include_lower" : true,
                    "include_upper" : true
                  }
                }
              } ]
            }
          }, {
            "terms" : {
              "corAgrupado" : [ "PRETO", "VERMELHO" ]
            }
          }, {
            "terms" : {
              "tamanhos.tamanho" : [ "m" ]
            }
          }, {
            "nested" : {
              "query" : {
                "bool" : {
                  "must" : {
                    "match" : {
                      "classificacoes.grupo" : {
                        "query" : 38,
                        "type" : "boolean"
                      }
                    }
                  }
                }
              },
              "path" : "classificacoes"
            }
          } ]
        }
      }
    }
  },
  "sort" : [ {
    "qtdComprada" : {
      "order" : "desc"
    }
  }, {
    "_score" : { }
  }, {
    "cliques" : {
      "order" : "desc"
    }
  } ],
  "aggregations" : {
    "tipoProduto" : {
      "terms" : {
        "field" : "tipoProduto",
        "size" : 100
      }
    },
    "loja" : {
      "terms" : {
        "field" : "loja.codigo",
        "size" : 100
      }
    },
    "tamanho" : {
      "terms" : {
        "field" : "tamanhos.tamanho",
        "size" : 100
      }
    },
    "corAgrupado" : {
      "terms" : {
        "field" : "corAgrupado",
        "size" : 100
      }
    },
    "grupos" : {
      "nested" : {
        "path" : "classificacoes"
      },
      "aggregations" : {
        "grupo" : {
          "terms" : {
            "field" : "classificacoes.grupo",
            "size" : 100
          },
          "aggregations" : {
            "subgrupo" : {
              "terms" : {
                "field" : "classificacoes.subgrupo",
                "size" : 100
              },
              "aggregations" : {
                "subclasse" : {
                  "terms" : {
                    "field" : "classificacoes.subclasse",
                    "size" : 100
                  }
                }
              }
            }
          }
        }
      }
    },
    "marca" : {
      "terms" : {
        "field" : "marca.codigo",
        "size" : 100
      }
    },
    "valor" : {
      "range" : {
        "field" : "valor",
        "ranges" : [ {
          "key" : "ATE_49",
          "from" : 0.01,
          "to" : 50.0
        }, {
          "key" : "DE_50_ATE_99",
          "from" : 50.0,
          "to" : 100.0
        }, {
          "key" : "DE_100_ATE_149",
          "from" : 100.0,
          "to" : 150.0
        }, {
          "key" : "DE_149_ATE_199",
          "from" : 150.0,
          "to" : 200.0
        }, {
          "key" : "MAIOR_200",
          "from" : 200.0,
          "to" : 100000.0
        } ]
      }
    },
    "faixaEtariasLoja" : {
      "terms" : {
        "field" : "faixaEtariasLoja",
        "size" : 100
      }
    }
  },
  "suggest" : {
    "desc" : {
      "text" : "",
      "phrase" : {
        "field" : "descritivo"
      }
    }
  }
}

The filtered and bool query should have the same performance, bool is just the new syntax for filtered queries (using filter clauses).

I am understanding that you want to know the performance difference between using a terms query as a query versus using a matchAll query as a query and apply a filter later.

I understand that, intuitively, you may think that using the terms query as a query would be better compared to filtering since Elasticsearch would precisely hit the documents that matches the query whereas with the filtering Elasticsearch would scan every single document to apply the filter. But that's not what really happens under the covers because Elasticsearch almost never scans the whole document dataset regardless if that terms query is being used as a filter or not.

Generally, by using a matchAll + filter it would have a better performance since there is no scoring calculation involved but since you are using just a terms query then the scoring overhead is really low and that performance difference is marginal.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.