Possible bug with 6.3 -- Analyzer not working correctly

I have the following mapping:

{
    "settings": {
        "index.codec": "best_compression",
        "index.shard.check_on_startup": "checksum",
        "index": {
            "number_of_shards": 10,
            "number_of_replicas": 0,
            "sort.field" : "created_at", 
            "sort.order" : "desc"
        },
        "analysis": {
            "analyzer": {
                "custom": {
                    "filter": ["lowercase"],
                    "tokenizer": "whitespace"
                }
            },
            "normalizer": {
                "my_normalizer": {
                    "type": "custom",
                    "char_filter": [],
                    "filter": ["lowercase", "asciifolding"]
                }
            }
        }
    },
    "mappings": {
        "user": {
            "_routing": {
                "required": false
            },
            "_all": {
                "enabled": false
            },
            "_source": {
                "enabled": true,
                "excludes": ["id"]
            },
            "dynamic": "false",
            "properties": {
                "description": {
                    "type": "text",
                    "analyzer": "custom"
                },
                "location": {
                    "type": "text",
                    "analyzer": "custom",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 128
                        }
                    }
                },
                "name": {
                    "type": "text",
                    "analyzer": "custom",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 128
                        }
                    }
                },
                "followers_count": {
                    "type": "integer"
                },
                "friends_count": {
                    "type": "integer"
                },
                "listed_count": {
                    "type": "integer"
                },
                "favourites_count": {
                    "type": "integer"
                },
                "statuses_count": {
                    "type": "integer"
                },
                "lang": {
                    "type": "keyword",
                    "normalizer": "my_normalizer"
                },
                "screen_name": {
                    "type": "keyword",
                    "normalizer": "my_normalizer"
                },
                "profile_image_url_https": {
                    "type": "keyword",
                    "normalizer": "my_normalizer"
                },
                "profile_background_image_url_https": {
                    "type": "keyword",
                    "normalizer": "my_normalizer"
                },
                "profile_use_background_image": {
                    "type": "boolean"
                },
                "verified": {
                    "type": "boolean"
                },
                "protected": {
                    "type": "boolean"
                },
                "default_profile": {
                    "type": "boolean"
                },
                "created_at": {
                    "type": "date",
                    "format": "epoch_second"
                },
                "retrieved_on": {
                    "type": "date",
                    "format": "epoch_second"
                },
                "last_updated": {
                    "type": "date",
                    "format": "epoch_second"
                },
                "id": {
                    "type": "long"
                }
            }
        }
    }
}

I tested that the analyzer works correctly to get hashtags:

curl -H "Content-Type: application/json" -XPOST localhost:9200/twitter/_analyze -d '{"text":"This is a test to see how a #hashtag is tokenized","analyzer":"custom"}'

{
"tokens" : [
{
"token" : "this",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : "is",
"start_offset" : 5,
"end_offset" : 7,
"type" : "word",
"position" : 1
},
{
"token" : "a",
"start_offset" : 8,
"end_offset" : 9,
"type" : "word",
"position" : 2
},
{
"token" : "test",
"start_offset" : 10,
"end_offset" : 14,
"type" : "word",
"position" : 3
},
{
"token" : "to",
"start_offset" : 15,
"end_offset" : 17,
"type" : "word",
"position" : 4
},
{
"token" : "see",
"start_offset" : 18,
"end_offset" : 21,
"type" : "word",
"position" : 5
},
{
"token" : "how",
"start_offset" : 22,
"end_offset" : 25,
"type" : "word",
"position" : 6
},
{
"token" : "a",
"start_offset" : 26,
"end_offset" : 27,
"type" : "word",
"position" : 7
},
{
"token" : "#hashtag",
"start_offset" : 28,
"end_offset" : 36,
"type" : "word",
"position" : 8
},
{
"token" : "is",
"start_offset" : 37,
"end_offset" : 39,
"type" : "word",
"position" : 9
},
{
"token" : "tokenized",
"start_offset" : 40,
"end_offset" : 49,
"type" : "word",
"position" : 10
}
]
}

However, when I index and try to search on hashtags, I get no results:

curl -H "Content-Type: application/json" -XGET localhost:9200/twitter/user/_search?pretty=true -d '{"query":{"match":{"description":{"query":"#privacy"}}}}'

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 10,
    "successful" : 10,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 0,
    "max_score" : null,
    "hits" : [ ]
  }
}

Is this something that broke recently?

Bug: Elasticsearch is not using the same search analyzer as that defined for indexing.
Edit: Honestly, I don't know what's going on here. It's still not working as documented.

Can you show some of the documents that should be returned but aren't? I can try to reproduce it locally.

I've not heard anyone else encountering something like this yet, would be good to hunt down the problem.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.