Match on Ngram field where the words are in order without scripting

Hello, I have a question regarding auto-complete using Ngram search, Im using elasticsearch 7.3, so the new "wildcard" field type is not relevant for me

This is my index:

PUT ngram_test
{
  "mappings": {
    "properties": {
      "name": {
        "type": "text",
        "fields": {
          "edgengram": {
            "type": "text",
            "analyzer": "edge_ngram_analyzer",
            "search_analyzer": "main_analyzer"
          },
          "keyword": {
            "type": "keyword",
            "normalizer": "lower_keyword"
          }
        },
        "analyzer": "main_analyzer"
      }
    }
  },
  "settings": {
    "index": {
      "number_of_shards": "1",
      "analysis": {
        "normalizer": {
          "lower_keyword": {
            "filter": "lowercase",
            "type": "custom"
          }
        },
        "analyzer": {
          "edge_ngram_analyzer": {
            "filter": [
              "lowercase"
            ],
            "tokenizer": "edge_ngram_tokenizer"
          },
          "main_analyzer": {
            "filter": "lowercase",
            "tokenizer": "standard"
          }
        },
        "tokenizer": {
          "edge_ngram_tokenizer": {
            "token_chars": [
              "letter",
              "digit"
            ],
            "min_gram": "1",
            "type": "edge_ngram",
            "max_gram": "15"
          }
        }
      },
      "number_of_replicas": "0"
    }
  }
}

And docs:

POST ngram_test/_doc/1
{
  "name": "Burger King"
}

POST ngram_test/_doc/2
{
  "name": "Burger King Japan"
}

POST ngram_test/_doc/3
{
  "name": "Great Burger of the Kings"
}

POST ngram_test/_doc/4
{
  "name": "King of Burger"
}

POST ngram_test/_doc/5
{
  "name": "Kind Burgers"
}

My target is for the user to input "Burger Ki" and only get the first three docs
So far I'm doing it this way and it works, but I was wondering if there is a way to do it without using scripting as it can be slow if I'm not mistaken

GET ngram_test/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "name.edgengram": {
              "query": "Burger Ki",
              "operator": "and"
            }
          }
        }
      ],
      "filter": {
        "script": {
          "script": {
            "params": {
              "input": ["burger", "ki"]
            },
            "source": """
            int pos = -1;
            String name = doc['name.keyword'].value;
            for (word in params.input) {
              int current_pos = name.indexOf(word);
              if (current_pos > pos) {
                pos = current_pos;
              }
              else {
                return false;
              }
            }
            return true;
            """
          }
        }
      }
    }
  }
}

^^ the script also doesn't take into account cases where the same ngram will appear twice in two different indexes, so that's something I'll need to solve too

Thanks!

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.