ElasticSearch query to perform case-insensitive search with aggregations to get matching column counts

We are using ElasticSearch 5.0.0.

Please let us know if there is any regex or any other way to perform case insensitive search.

Please find data in movies index in ElasticSearch in attachment.

Please find aggregation query to find fields matching search string “drama” in movies index:

GET /movies/_search?pretty
{
  "size": 0,
  "_source": false,
  "query": {
    "query_string": {
      "analyze_wildcard": true,
      "query": "*drama*"
    }
  },
  "aggs": {
    "distinct_tables_1": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_1": {
          "terms": {
            "field": "genres.keyword",
            "include" : ".*drama.*"
          }
        }
      }
    },
    "distinct_tables_2": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_2": {
          "terms": {
            "field": "director.keyword",
            "include" : ".*drama.*"
          }
        }
      }
    },
     "distinct_tables_3": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_3": {
          "terms": {
            "field": "theatre.keyword",
           "include" : ".*drama.*"
          }
        }
      }
    }
  }
}

We get the following response:

{
  "took": 10,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 4,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "distinct_tables_1": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        }
      ]
    },
    "distinct_tables_2": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        }
      ]
    },
    "distinct_tables_3": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_3": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_3": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        }
      ]
    }
  }
}

It can be seen from the response that there are no matching columns values in response even though there are documents matching search string “drama”. The search for regex in aggregations appears to be case sensitive and so no values are returned.

We used this alternate query to find words matching Drama to perform case-insensitive search. However this uses only part word .rama. instead of Drama and it would be better to perform case-insensitive search.

GET /movies/_search?pretty
{
  "size": 0,
  "_source": false,
  "query": {
    "query_string": {
      "analyze_wildcard": true,
      "query": "*drama*"
    }
  },
  "aggs": {
    "distinct_tables_1": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_1": {
          "terms": {
            "field": "genres.keyword",
            "include" : ".*rama.*"
          }
        }
      }
    },
    "distinct_tables_2": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_2": {
          "terms": {
            "field": "director.keyword",
            "include" : ".*rama.*"
          }
        }
      }
    },
     "distinct_tables_3": {
      "terms": {
        "field": "_type"
      },
      "aggs": {
        "distinct_col_3": {
          "terms": {
            "field": "theatre.keyword",
            "include" : ".*rama.*"
          }
        }
      }
    }
  }
}

Adding the final response below due to size restrictions in the body.

Response for the query given above:

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 4,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "distinct_tables_1": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "BiographyDrama",
                "doc_count": 1
              },
              {
                "key": "Drama",
                "doc_count": 1
              }
            ]
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_1": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "BiographyDrama",
                "doc_count": 1
              },
              {
                "key": "Drama",
                "doc_count": 1
              }
            ]
          }
        }
      ]
    },
    "distinct_tables_2": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "Drama1",
                "doc_count": 1
              },
              {
                "key": "Drama4",
                "doc_count": 1
              }
            ]
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_2": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        }
      ]
    },
    "distinct_tables_3": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "movie_intrnl",
          "doc_count": 2,
          "distinct_col_3": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
          }
        },
        {
          "key": "movie_shows",
          "doc_count": 2,
          "distinct_col_3": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "Drama4",
                "doc_count": 1
              }
            ]
          }
        }
      ]
    }
  }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.