ElasticSearch query to perform case-insensitive search with aggregations to get matching column counts

annanyan · March 3, 2017, 2:54pm

We are using ElasticSearch 5.0.0.

Please let us know if there is any regex or any other way to perform case insensitive search.

Please find data in movies index in ElasticSearch in attachment.

Please find aggregation query to find fields matching search string “drama” in movies index:

GET /movies/_search?pretty
{
  "size": 0,
  "_source": false,
  "query": {
    "query_string": {
      "analyze_wildcard": true,
      "query": "*drama*"
    }
  },
  "aggs": {
    "distinct_tables_1": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_1": {
          "terms": {
            "field": "genres.keyword",
            "include" : ".*drama.*"
          }
        }
      }
    },
    "distinct_tables_2": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_2": {
          "terms": {
            "field": "director.keyword",
            "include" : ".*drama.*"
          }
        }
      }
    },
     "distinct_tables_3": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_3": {
          "terms": {
            "field": "theatre.keyword",
           "include" : ".*drama.*"
          }
        }
      }
    }
  }
}

We get the following response:

{
  "took": 10,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 4,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "distinct_tables_1": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        }
      ]
    },
    "distinct_tables_2": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        }
      ]
    },
    "distinct_tables_3": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_3": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_3": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        }
      ]
    }
  }
}

It can be seen from the response that there are no matching columns values in response even though there are documents matching search string “drama”. The search for regex in aggregations appears to be case sensitive and so no values are returned.

We used this alternate query to find words matching Drama to perform case-insensitive search. However this uses only part word .rama. instead of Drama and it would be better to perform case-insensitive search.

GET /movies/_search?pretty
{
  "size": 0,
  "_source": false,
  "query": {
    "query_string": {
      "analyze_wildcard": true,
      "query": "*drama*"
    }
  },
  "aggs": {
    "distinct_tables_1": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_1": {
          "terms": {
            "field": "genres.keyword",
            "include" : ".*rama.*"
          }
        }
      }
    },
    "distinct_tables_2": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_2": {
          "terms": {
            "field": "director.keyword",
            "include" : ".*rama.*"
          }
        }
      }
    },
     "distinct_tables_3": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_3": {
          "terms": {
            "field": "theatre.keyword",
            "include" : ".*rama.*"
          }
        }
      }
    }
  }
}

annanyan · March 3, 2017, 2:56pm

Adding the final response below due to size restrictions in the body.

Response for the query given above:

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 4,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "distinct_tables_1": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "BiographyDrama",
                "doc_count": 1
              },
              {
                "key": "Drama",
                "doc_count": 1
              }
            ]
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "BiographyDrama",
                "doc_count": 1
              },
              {
                "key": "Drama",
                "doc_count": 1
              }
            ]
          }
        }
      ]
    },
    "distinct_tables_2": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "Drama1",
                "doc_count": 1
              },
              {
                "key": "Drama4",
                "doc_count": 1
              }
            ]
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        }
      ]
    },
    "distinct_tables_3": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_3": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_3": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "Drama4",
                "doc_count": 1
              }
            ]
          }
        }
      ]
    }
  }
}

system · March 31, 2017, 2:56pm

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
Elastic Search Aggregation Based On Case Insensitive Elasticsearch	1	96	April 29, 2024
How to get the case insensitive results from a regex query Elasticsearch	4	6426	July 5, 2017
How to use search query in case insesitive for keyword Elasticsearch	2	381	June 1, 2018
Terms Aggregation with include filter Elasticsearch	10	12034	September 22, 2017
Case Insensitive aggregation not working Elasticsearch	5	451	April 8, 2024

ElasticSearch query to perform case-insensitive search with aggregations to get matching column counts

Related topics