I am very new to Elasticsearch and it's data modeling techniques. My dataset is a list of users and locations the users' have been associated with. I am trying to implement a type-ahead search on locations, I need to get a distinct set of locations from my dataset and then filter on those distinct locations based on user input
(sample kibana commands are provided below)
my data:
[
{
"Username" : "user2",
"FirstName" : "Jane",
"LastName" : "Doe",
"UserLocations" : [
{
"CountryDisplayText" : "USA",
"CountryID" : 1
}
]
},
{
"Username" : "user1",
"FirstName" : "John",
"LastName" : "Doe",
"UserLocations" : [
{
"CountryDisplayText" : "USA",
"CountryID" : 1
},
{
"CountryDisplayText" : "UK",
"CountryID" : 2
}
]
},
{
"Username" : "user3",
"FirstName" : "Elliot",
"LastName" : "Richardson",
"UserLocations" : [
{
"CountryDisplayText" : "USA",
"CountryID" : 1
},
{
"CountryDisplayText" : "Canada",
"CountryID" : 3
}
]
}
]
For example, if the user enters U, I only want to return USA and UK in the result. But the query I wrote returns all locations for the users that contain even 1 location that starts with U.
my query:
GET /users/_search
{
"query": {
"nested": {
"path": "UserLocations",
"query": {
"bool": {
"must": [
{"multi_match": {
"query": "U",
"type": "phrase_prefix",
"fields": ["UserLocations.CountryDisplayText"]
}}
]
}
}
}
},
"aggs": {
"locations": {
"nested": {
"path": "UserLocations"
},
"aggs": {
"distinctcountries": {
"multi_terms": {
"terms": [{
"field": "UserLocations.CountryDisplayText.keyword"
}, {
"field": "UserLocations.CountryID"
}]
}
}
}
}
},
"_source" : false,
"size": 0
}
Result, I get Canada in the list as well. All I would like to see is UK and USA. I basically want to do a group by and where clause on the result set.
...
"aggregations" : {
"locations" : {
"doc_count" : 5,
"distinctcountries" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : [
"USA",
1
],
"key_as_string" : "USA|1",
"doc_count" : 3
},
{
"key" : [
"Canada",
3
],
"key_as_string" : "Canada|3",
"doc_count" : 1
},
{
"key" : [
"UK",
2
],
"key_as_string" : "UK|2",
"doc_count" : 1
}
]
}
...
Sample data setup
PUT /users
{
"mappings" : {
"properties" : {
"Username" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"FirstName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"LastName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"UserLocations" : {
"type": "nested",
"properties" : {
"CountryDisplayText" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"CountryID" : {
"type" : "long"
}
}
}
}
},
"settings" : {
"index" : {
"routing" : {
"allocation" : {
"include" : {
"_tier_preference" : "data_content"
}
}
},
"number_of_shards" : "1",
"number_of_replicas" : "1"
}
}
}
PUT /users/_doc/1
{
"Username" : "user1",
"FirstName" : "John",
"LastName" : "Doe",
"UserLocations" : [
{
"CountryDisplayText": "USA",
"CountryID":1
},
{
"CountryDisplayText": "UK",
"CountryID":2
}
]
}
PUT /users/_doc/2
{
"Username" : "user2",
"FirstName" : "Jane",
"LastName" : "Doe",
"UserLocations" : [
{
"CountryDisplayText": "USA",
"CountryID":1
}
]
}
PUT /users/_doc/3
{
"Username" : "user3",
"FirstName" : "Elliot",
"LastName" : "Richardson",
"UserLocations" : [
{
"CountryDisplayText": "USA",
"CountryID":1
},
{
"CountryDisplayText": "Canada",
"CountryID":3
}
]
}
GET /users/_search
the following query gets me all distinct location but I want to be able to do starts with or contains search on the distinct location for typeahead.
GET /users/_search
{
"aggs": {
"UserLocations": {
"nested": {
"path": "UserLocations"
},
"aggs": {
"distinctCountries": {
"multi_terms": {
"terms": [{
"field": "UserLocations.CountryDisplayText.keyword"
}, {
"field": "UserLocations.CountryID"
}]
}
}
}
}
},
"_source" : false,
"size": 0
}
Thank you so much for your help