Filter on distinct nested documents

I am very new to Elasticsearch and it's data modeling techniques. My dataset is a list of users and locations the users' have been associated with. I am trying to implement a type-ahead search on locations, I need to get a distinct set of locations from my dataset and then filter on those distinct locations based on user input
(sample kibana commands are provided below)

my data:

[
      {
          "Username" : "user2",
          "FirstName" : "Jane",
          "LastName" : "Doe",
          "UserLocations" : [
            {
              "CountryDisplayText" : "USA",
              "CountryID" : 1
            }
          ]
      },
      {
          "Username" : "user1",
          "FirstName" : "John",
          "LastName" : "Doe",
          "UserLocations" : [
            {
              "CountryDisplayText" : "USA",
              "CountryID" : 1
            },
            {
              "CountryDisplayText" : "UK",
              "CountryID" : 2
            }
          ]
      },
      {
          "Username" : "user3",
          "FirstName" : "Elliot",
          "LastName" : "Richardson",
          "UserLocations" : [
            {
              "CountryDisplayText" : "USA",
              "CountryID" : 1
            },
            {
              "CountryDisplayText" : "Canada",
              "CountryID" : 3
            }
          ]
      }
    ]

For example, if the user enters U, I only want to return USA and UK in the result. But the query I wrote returns all locations for the users that contain even 1 location that starts with U.

my query:

GET /users/_search
{
  "query": {
    "nested": {
      "path": "UserLocations",
      "query": {
        "bool": {
          "must": [
            {"multi_match": {
              "query": "U",
              "type": "phrase_prefix", 
              "fields": ["UserLocations.CountryDisplayText"]
            }}
          ]
        }
      }
    }
  },
  "aggs": {
    "locations": {
      "nested": {
        "path": "UserLocations"
      },
      "aggs": {
        "distinctcountries": {
          "multi_terms": {
            "terms": [{
              "field": "UserLocations.CountryDisplayText.keyword" 
            }, {
              "field": "UserLocations.CountryID"
            }]
          }
        }
      }
    }
  },
  "_source" : false,
  "size": 0
}

Result, I get Canada in the list as well. All I would like to see is UK and USA. I basically want to do a group by and where clause on the result set.

...
"aggregations" : {
    "locations" : {
      "doc_count" : 5,
      "distinctcountries" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : [
              "USA",
              1
            ],
            "key_as_string" : "USA|1",
            "doc_count" : 3
          },
          {
            "key" : [
              "Canada",
              3
            ],
            "key_as_string" : "Canada|3",
            "doc_count" : 1
          },
          {
            "key" : [
              "UK",
              2
            ],
            "key_as_string" : "UK|2",
            "doc_count" : 1
          }
        ]
      }
...

Sample data setup

PUT /users
{
  "mappings" : {
      "properties" : {  
        "Username" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },      
        "FirstName" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "LastName" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "UserLocations" : {
          "type": "nested", 
          "properties" : {
            "CountryDisplayText" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "CountryID" : {
              "type" : "long"
            }            
          }
        }        
      }
    },
    "settings" : {
      "index" : {
        "routing" : {
          "allocation" : {
            "include" : {
              "_tier_preference" : "data_content"
            }
          }
        },
        "number_of_shards" : "1",
        "number_of_replicas" : "1"
      }
    }
}


PUT /users/_doc/1
{
  "Username" : "user1",
  "FirstName" : "John",
  "LastName" : "Doe",
  "UserLocations" : [
    {
      "CountryDisplayText": "USA",
      "CountryID":1
    },
    {
      "CountryDisplayText": "UK",
      "CountryID":2
    }
  ]
}

PUT /users/_doc/2
{
  "Username" : "user2",
  "FirstName" : "Jane",
  "LastName" : "Doe",
  "UserLocations" : [
    {
      "CountryDisplayText": "USA",
      "CountryID":1
    }
  ]
}

PUT /users/_doc/3
{
  "Username" : "user3",
  "FirstName" : "Elliot",
  "LastName" : "Richardson",
  "UserLocations" : [
    {
      "CountryDisplayText": "USA",
      "CountryID":1
    },
    {
      "CountryDisplayText": "Canada",
      "CountryID":3
    }
  ]
}


GET /users/_search

the following query gets me all distinct location but I want to be able to do starts with or contains search on the distinct location for typeahead.

GET /users/_search
{
  "aggs": {
    "UserLocations": {
      "nested": {
        "path": "UserLocations"
      },
      "aggs": {
        "distinctCountries": {
          "multi_terms": {
            "terms": [{
              "field": "UserLocations.CountryDisplayText.keyword" 
            }, {
              "field": "UserLocations.CountryID"
            }]
          }
        }
      }
    }
  },
  "_source" : false,
  "size": 0
}

Thank you so much for your help

tested out an option recommened on the elasticsearch slack channel. But still no luck... any help would be greatly appreciated

GET /users/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "nested": {
            "path": "UserLocations",
            "query": {
              "bool": {
                "filter": [
                  {
                    "prefix": {
                      "UserLocations.CountryDisplayText.keyword": "U"
                    }
                  }
                ]
              }
            }
          }      
        }
      ]
    }
    }, 
    
  "aggs": {
   "distinctCountries":{
     "terms": {
       "field": "UserLocations.CountryDisplayText.keyword"
     }
   }
  },
  "_source" : false,
  "size": 0
}

results:

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "distinctCountries" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [ ]
    }
  }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.