Issue searching for first_name and last_name when there is a compound name

Hi, I am newbie to ES and I am trying to create a search as you type experience that will find clients by their first name, last name, email and phone number. I am using the code below but it is not working as expected when I have clients with compound names like:

first_name: John Maurice
last_name: Moore

Trying to search it doesn't return anything but if I search for John Moore then it does return. Could someone help me with this?

Thanks in advance.

Kibana script with comments of the desired behavior:

DELETE clients


PUT /clients
{
  "settings": {
    "analysis": {
      "analyzer": {
        "incremental_breakdown": { 
          "tokenizer": "incremental_tokenizer",
          "filter": [
            "lowercase"
          ]
        }
      },
      "tokenizer": {
        "incremental_tokenizer": {
          "type": "edge_ngram",
          "min_gram": "1",
          "max_gram": "50"
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "first_name": {
        "type": "text",
        "analyzer": "incremental_breakdown",
        "search_analyzer": "standard"
      },
      "last_name": {
        "type": "text",
        "analyzer": "incremental_breakdown",
        "search_analyzer": "standard"
      },
      "email": {
        "type": "text",
        "fields": {
          "raw": { "type": "keyword" }
        },
        "analyzer": "incremental_breakdown"
      }
    }
  }
}


PUT clients/_bulk?refresh
{ "index" : { "_id" : 1} }
{"first_name":"John", "last_name":"Travolta", "email":"jmtravolta@gmail.com"}
{ "index" : { "_id" : 2} }
{"first_name":"John", "last_name":"Travis Travolta", "email":"jmtravis@gmail.com"}
{ "index" : { "_id" : 3} }
{"first_name":"John", "last_name":"Blank", "email":"jblank@email.com"}
{ "index" : { "_id" : 4} }
{"first_name":"Mark", "last_name":"Lenon", "email":"mlenon@email.com"}
{ "index" : { "_id" : 5} }
{"first_name":"Justin", "last_name":"Travis", "email":"iamjustin@email.com"}
{ "index" : { "_id" : 6} }
{"first_name":"Travis", "last_name":"Gordon", "email":"here@travis.com"}
{ "index" : { "_id" : 7} }
{"first_name":"John Maurice", "last_name":"Moore", "email":"jmmoore@email.com"}


# -------------------------------------------------------------
# search for: Justin Travis -> find 1 record
# Issues:
# 1 - Search for `John Maurice Moore` or `John Travis Travolta` doesn't return anything but it should.
# 2 - search for `Travolta` should return records 1 and 2;
# 2 - search for `John Travolta` should return records 1 and 2

POST clients/_search
{
  "query": {
    "bool": {
      "must": [
        {
         "multi_match": {
            "type": "cross_fields",
            "query": "John Travis Travolta",
            "fields": [
              "first_name^2",
              "last_name",
              "email"
            ],
            "operator": "and"
         }
        }
      ]
    }
  },
  "highlight": {
    "fields": {
      "first_name": {},
      "last_name": {},
      "email": {},
      "nice_name": {}
    }
  }
}

I think I've found a solution by myself by tokenizing the field by whitespace and applying ngran_filter to it, as shown in the code below.

I am just wondering what you guys think it is a good solution or there is a better way to tackle this.

Thanks again.


DELETE clients


PUT /clients
{
  "settings": {
    "analysis": {
      "filter": {
        "ngram_filter": {
           "type": "edge_ngram",
            "min_gram": 1,
            "max_gram": 50
        }
      },
      "analyzer": {
        "incremental_breakdown": { 
          "tokenizer": "whitespace",
          "filter": [
            "ngram_filter", "lowercase"
          ]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "first_name": {
        "type": "text",
        "analyzer": "incremental_breakdown",
        "search_analyzer": "standard"
      },
      "last_name": {
        "type": "text",
        "analyzer": "incremental_breakdown",
        "search_analyzer": "standard"
      },
      "email": {
        "type": "text",
        "fields": {
          "raw": { "type": "keyword" }
        },
        "analyzer": "incremental_breakdown",
        "search_analyzer": "whitespace"
      }
    }
  }
}


PUT clients/_bulk?refresh
{ "index" : { "_id" : 1} }
{"first_name":"John", "last_name":"Travolta", "email":"jmtravolta@gmail.com"}
{ "index" : { "_id" : 2} }
{"first_name":"John", "last_name":"Travis Travolta", "email":"jmtravis@gmail.com"}
{ "index" : { "_id" : 3} }
{"first_name":"JohnJohnJohn", "last_name":"Blank", "email":"jblank@email.com"}
{ "index" : { "_id" : 4} }
{"first_name":"Mark", "last_name":"Lenon", "email":"mlenon@email.com"}
{ "index" : { "_id" : 5} }
{"first_name":"Justin", "last_name":"Travis", "email":"iamjustin@email.com"}
{ "index" : { "_id" : 6} }
{"first_name":"Travis", "last_name":"Gordon", "email":"here@travis.com"}
{ "index" : { "_id" : 7} }
{"first_name":"John Maurice", "last_name":"Moore", "email":"jmmoore@email.com"}


# -------------------------------------------------------------

POST clients/_search
{
  "query": {
    "bool": {
      "must": [
        {
         "multi_match": {
            "type": "cross_fields",
            "query": "John",
            "fields": [
              "first_name^2",
              "last_name",
              "email"
            ],
            "operator": "and"
         }
        }
      ]
    }
  },
  "highlight": {
    "fields": {
      "first_name": {},
      "last_name": {},
      "email": {},
      "nice_name": {}
    }
  }
}
1 Like

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.