I am using Elasticsearch and Kibana version 8.12.0. I am encountering an issue where certain queries on the title
field of my index unexpectedly return 0 results.
Ideally, these queries (query_string and regexp) should match all documents containing all the variants of the word 'award' in the title
field.
Steps to Reproduce:
- Index Creation Request:
PUT /my-index
{
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0,
"query.default_field": "body"
},
"analysis": {
"analyzer": {
"index_text_unstemmed": {
"type": "custom",
"char_filter": [
"remove_url_char_filter",
"alias_for_question_mark_char_filter",
"replace_startswith_number_char_filter",
"has_non_starting_number_char_filter",
"alias_for_colon_char_filter"
],
"filter": [
"lowercase",
"possessive_english_stemmer"
],
"tokenizer": "standard"
},
"query_text_unstemmed": {
"type": "custom",
"char_filter": [],
"filter": [
"lowercase",
"possessive_english_stemmer"
],
"tokenizer": "standard"
}
},
"char_filter": {
"remove_url_char_filter": {
"type": "pattern_replace",
"pattern": "(https?:\\/\\/(?:www.|(?!www))[^\\s.]+\\.[^\\s]{2,}|www\\.[^\\s]+\\.[^\\s]{2,})",
"replacement": ""
},
"alias_for_question_mark_char_filter": {
"type": "pattern_replace",
"pattern": "(\\?)",
"replacement": " hasquestionmark "
},
"replace_startswith_number_char_filter": {
"type": "pattern_replace",
"pattern": "(startswith [0-9]+)",
"replacement": "startswithnumber startswith "
},
"has_non_starting_number_char_filter": {
"type": "pattern_replace",
"pattern": "([0-9]+)",
"replacement": " hasnonstartingnumber "
},
"alias_for_colon_char_filter": {
"type": "pattern_replace",
"pattern": "(:)",
"replacement": " aliasforcolon "
}
},
"filter": {
"possessive_english_stemmer": {
"type": "stemmer",
"name": "possessive_english"
}
}
}
},
"mappings": {
"dynamic": "runtime",
"properties": {
"id": {
"type": "keyword",
"index": true,
"store": true
},
"title": {
"type": "text",
"index": true,
"store": false,
"analyzer": "index_text_unstemmed",
"search_analyzer": "query_text_unstemmed"
},
"body": {
"type": "text",
"analyzer": "english",
"index": true,
"store": false
}
}
}
}
- Sample Data Indexing:
POST /my-index/_bulk
{ "index": { "_id": "1" } }
{ "id": "1", "title": "Best Movie Award given", "body": "The movie received critical acclaim and won the best movie award of the year." }
{ "index": { "_id": "2" } }
{ "id": "2", "title": "Awarded for Bravery", "body": "The soldier was awarded for his outstanding bravery in battle." }
{ "index": { "_id": "3" } }
{ "id": "3", "title": "Awards Ceremony Highlights", "body": "The annual awards ceremony highlighted achievements in various fields." }
{ "index": { "_id": "4" } }
{ "id": "4", "title": "Awarding Scholarships to Students", "body": "The foundation is awarding scholarships to underprivileged students this year." }
{ "index": { "_id": "5" } }
{ "id": "5", "title": "Award-Winning Author Releases New Book", "body": "The award-winning author has captivated readers with their latest novel." }
{ "index": { "_id": "6" } }
{ "id": "6", "title": "Prestigious Lifetime Achievement Award", "body": "The recipient was honored with a prestigious lifetime achievement award." }
{ "index": { "_id": "7" } }
{ "id": "7", "title": "Nominees Announced for the Annual awards", "body": "The list of nominees for the annual awards has been made public." }
{ "index": { "_id": "8" } }
{ "id": "8", "title": "Community award winners", "body": "The award winners were celebrated for their exceptional community service efforts." }
{ "index": { "_id": "9" } }
{ "id": "9", "title": "Excellence in Research", "body": "The organization is committed to awarding excellence in scientific research." }
{ "index": { "_id": "10" } }
{ "id": "10", "title": "Award Season Gala Events", "body": "Award season is here, bringing glamorous gala events to the city." }
- Query 1: Query String:
POST my-index/_search
{
"from": 0,
"size": 10,
"track_total_hits": true,
"explain": true,
"profile": true,
"_source": ["title"],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "title:(/Award*/)",
"default_operator": "AND",
"auto_generate_synonyms_phrase_query": false
}
}
]
}
}
}
Response:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 0,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"profile": {
"shards": [
{
"id": "[LfsSGEOdSMyJ61xnTNdvHg][my-index][0]",
"node_id": "LfsSGEOdSMyJ61xnTNdvHg",
"shard_id": 0,
"index": "my-index",
"cluster": "(local)",
"searches": [
{
"query": [
{
"type": "MultiTermQueryConstantScoreBlendedWrapper",
"description": "title:/Award*/",
"time_in_nanos": 54642,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 323,
"match": 0,
"score_count": 0,
"next_doc_count": 1,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 0,
"advance_count": 0,
"score": 0,
"count_weight_count": 0,
"build_scorer_count": 2,
"create_weight": 986,
"shallow_advance": 0,
"count_weight": 0,
"create_weight_count": 1,
"build_scorer": 53333
}
}
],
"rewrite_time": 13805,
"collector": [
{
"name": "QueryPhaseCollector",
"reason": "search_query_phase",
"time_in_nanos": 7851,
"children": [
{
"name": "SimpleTopScoreDocCollector",
"reason": "search_top_hits",
"time_in_nanos": 2989
}
]
}
]
}
],
"aggregations": []
}
]
}
}
- Query 2: Regexp Query:
POST my-index/_search
{
"from": 0,
"size": 10,
"track_total_hits": true,
"explain": true,
"profile": true,
"_source": ["title"],
"query": {
"regexp": {
"title": {
"value": "Award*"
}
}
}
}
Response:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 0,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"profile": {
"shards": [
{
"id": "[LfsSGEOdSMyJ61xnTNdvHg][my-index][0]",
"node_id": "LfsSGEOdSMyJ61xnTNdvHg",
"shard_id": 0,
"index": "my-index",
"cluster": "(local)",
"searches": [
{
"query": [
{
"type": "MultiTermQueryConstantScoreBlendedWrapper",
"description": "title:/Award*/",
"time_in_nanos": 93805,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 5190,
"match": 0,
"score_count": 0,
"next_doc_count": 1,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 0,
"advance_count": 0,
"score": 0,
"count_weight_count": 0,
"build_scorer_count": 2,
"create_weight": 609,
"shallow_advance": 0,
"count_weight": 0,
"create_weight_count": 1,
"build_scorer": 88006
}
}
],
"rewrite_time": 8431,
"collector": [
{
"name": "QueryPhaseCollector",
"reason": "search_query_phase",
"time_in_nanos": 8105,
"children": [
{
"name": "SimpleTopScoreDocCollector",
"reason": "search_top_hits",
"time_in_nanos": 3462
}
]
}
]
}
],
"aggregations": []
}
]
}
}