Unexpected 0 Results for Queries on Custom Analyzed Field (title)

I am using Elasticsearch and Kibana version 8.12.0. I am encountering an issue where certain queries on the title field of my index unexpectedly return 0 results.

Ideally, these queries (query_string and regexp) should match all documents containing all the variants of the word 'award' in the title field.

Steps to Reproduce:

  1. Index Creation Request:
PUT /my-index
{
  "settings": {
    "index": {
      "number_of_shards": 1,
      "number_of_replicas": 0,
      "query.default_field": "body"
    },
    "analysis": {
      "analyzer": {
        "index_text_unstemmed": {
            "type": "custom",
            "char_filter": [
              "remove_url_char_filter",
              "alias_for_question_mark_char_filter",
              "replace_startswith_number_char_filter",
              "has_non_starting_number_char_filter",
              "alias_for_colon_char_filter"
            ],
            "filter": [
              "lowercase",
              "possessive_english_stemmer"
            ],
            "tokenizer": "standard"
        },
        "query_text_unstemmed": {
            "type": "custom",
            "char_filter": [],
            "filter": [
              "lowercase",
              "possessive_english_stemmer"
            ],
            "tokenizer": "standard"
        }
      },
      "char_filter": {
        "remove_url_char_filter": {
          "type": "pattern_replace",
          "pattern": "(https?:\\/\\/(?:www.|(?!www))[^\\s.]+\\.[^\\s]{2,}|www\\.[^\\s]+\\.[^\\s]{2,})",
          "replacement": ""
        },
        "alias_for_question_mark_char_filter": {
          "type": "pattern_replace",
          "pattern": "(\\?)",
          "replacement": " hasquestionmark "
        },
        "replace_startswith_number_char_filter": {
          "type": "pattern_replace",
          "pattern": "(startswith [0-9]+)",
          "replacement": "startswithnumber startswith "
        },
        "has_non_starting_number_char_filter": {
          "type": "pattern_replace",
          "pattern": "([0-9]+)",
          "replacement": " hasnonstartingnumber "
        },
        "alias_for_colon_char_filter": {
          "type": "pattern_replace",
          "pattern": "(:)",
          "replacement": " aliasforcolon "
        }
      },
      "filter": {
        "possessive_english_stemmer": {
          "type": "stemmer",
          "name": "possessive_english"
        }
      }
    }
  },
  "mappings": {
    "dynamic": "runtime",
    "properties": {
      "id": {
        "type": "keyword",
        "index": true,
        "store": true
      },
      "title": {
        "type": "text",
        "index": true,
        "store": false,
        "analyzer": "index_text_unstemmed",
        "search_analyzer": "query_text_unstemmed"
      },
      "body": {
        "type": "text",
        "analyzer": "english",
        "index": true,
        "store": false
      }
    }
  }
}
  1. Sample Data Indexing:
POST /my-index/_bulk
{ "index": { "_id": "1" } }
{ "id": "1", "title": "Best Movie Award given", "body": "The movie received critical acclaim and won the best movie award of the year." }
{ "index": { "_id": "2" } }
{ "id": "2", "title": "Awarded for Bravery", "body": "The soldier was awarded for his outstanding bravery in battle." }
{ "index": { "_id": "3" } }
{ "id": "3", "title": "Awards Ceremony Highlights", "body": "The annual awards ceremony highlighted achievements in various fields." }
{ "index": { "_id": "4" } }
{ "id": "4", "title": "Awarding Scholarships to Students", "body": "The foundation is awarding scholarships to underprivileged students this year." }
{ "index": { "_id": "5" } }
{ "id": "5", "title": "Award-Winning Author Releases New Book", "body": "The award-winning author has captivated readers with their latest novel." }
{ "index": { "_id": "6" } }
{ "id": "6", "title": "Prestigious Lifetime Achievement Award", "body": "The recipient was honored with a prestigious lifetime achievement award." }
{ "index": { "_id": "7" } }
{ "id": "7", "title": "Nominees Announced for the Annual awards", "body": "The list of nominees for the annual awards has been made public." }
{ "index": { "_id": "8" } }
{ "id": "8", "title": "Community award winners", "body": "The award winners were celebrated for their exceptional community service efforts." }
{ "index": { "_id": "9" } }
{ "id": "9", "title": "Excellence in Research", "body": "The organization is committed to awarding excellence in scientific research." }
{ "index": { "_id": "10" } }
{ "id": "10", "title": "Award Season Gala Events", "body": "Award season is here, bringing glamorous gala events to the city." }
  1. Query 1: Query String:
POST my-index/_search
{
  "from": 0,
  "size": 10,
  "track_total_hits": true,
  "explain": true, 
  "profile": true,
  "_source": ["title"],
  "query": {
    "bool": {
      "must": [
        {
          "query_string": {
            "query": "title:(/Award*/)",
            "default_operator": "AND",
            "auto_generate_synonyms_phrase_query": false
          }
        }
      ]
    }
  }
}

Response:

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 0,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  },
  "profile": {
    "shards": [
      {
        "id": "[LfsSGEOdSMyJ61xnTNdvHg][my-index][0]",
        "node_id": "LfsSGEOdSMyJ61xnTNdvHg",
        "shard_id": 0,
        "index": "my-index",
        "cluster": "(local)",
        "searches": [
          {
            "query": [
              {
                "type": "MultiTermQueryConstantScoreBlendedWrapper",
                "description": "title:/Award*/",
                "time_in_nanos": 54642,
                "breakdown": {
                  "set_min_competitive_score_count": 0,
                  "match_count": 0,
                  "shallow_advance_count": 0,
                  "set_min_competitive_score": 0,
                  "next_doc": 323,
                  "match": 0,
                  "score_count": 0,
                  "next_doc_count": 1,
                  "compute_max_score_count": 0,
                  "compute_max_score": 0,
                  "advance": 0,
                  "advance_count": 0,
                  "score": 0,
                  "count_weight_count": 0,
                  "build_scorer_count": 2,
                  "create_weight": 986,
                  "shallow_advance": 0,
                  "count_weight": 0,
                  "create_weight_count": 1,
                  "build_scorer": 53333
                }
              }
            ],
            "rewrite_time": 13805,
            "collector": [
              {
                "name": "QueryPhaseCollector",
                "reason": "search_query_phase",
                "time_in_nanos": 7851,
                "children": [
                  {
                    "name": "SimpleTopScoreDocCollector",
                    "reason": "search_top_hits",
                    "time_in_nanos": 2989
                  }
                ]
              }
            ]
          }
        ],
        "aggregations": []
      }
    ]
  }
}
  1. Query 2: Regexp Query:
POST my-index/_search
{
  "from": 0,
  "size": 10,
  "track_total_hits": true,
  "explain": true, 
  "profile": true,
  "_source": ["title"], 
  "query": {
    "regexp": {
      "title": {
        "value": "Award*"
        
      }
    }
  }
}

Response:

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 0,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  },
  "profile": {
    "shards": [
      {
        "id": "[LfsSGEOdSMyJ61xnTNdvHg][my-index][0]",
        "node_id": "LfsSGEOdSMyJ61xnTNdvHg",
        "shard_id": 0,
        "index": "my-index",
        "cluster": "(local)",
        "searches": [
          {
            "query": [
              {
                "type": "MultiTermQueryConstantScoreBlendedWrapper",
                "description": "title:/Award*/",
                "time_in_nanos": 93805,
                "breakdown": {
                  "set_min_competitive_score_count": 0,
                  "match_count": 0,
                  "shallow_advance_count": 0,
                  "set_min_competitive_score": 0,
                  "next_doc": 5190,
                  "match": 0,
                  "score_count": 0,
                  "next_doc_count": 1,
                  "compute_max_score_count": 0,
                  "compute_max_score": 0,
                  "advance": 0,
                  "advance_count": 0,
                  "score": 0,
                  "count_weight_count": 0,
                  "build_scorer_count": 2,
                  "create_weight": 609,
                  "shallow_advance": 0,
                  "count_weight": 0,
                  "create_weight_count": 1,
                  "build_scorer": 88006
                }
              }
            ],
            "rewrite_time": 8431,
            "collector": [
              {
                "name": "QueryPhaseCollector",
                "reason": "search_query_phase",
                "time_in_nanos": 8105,
                "children": [
                  {
                    "name": "SimpleTopScoreDocCollector",
                    "reason": "search_top_hits",
                    "time_in_nanos": 3462
                  }
                ]
              }
            ]
          }
        ],
        "aggregations": []
      }
    ]
  }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.