Filtering nested objects


(Oscar Romero) #1

I've read some topics about nested objects but I can not make this sample to work. Hopefully you can help me to check what is wrong:

Elasticsearch version 2.2

# Create Index
PUT agency

# Mapping
PUT agency/site/_mapping
{
      "site": {
          "properties": {
            "name":{
              "type":"string"
            },
            "phones": {
               "type": "nested",
               "properties":{
                 "is_confidential":    { "type": "string"  },
                 "number":    { "type": "string"  },
                 "description": {"type" : "string"}
                 
               }
            }
         }
      }
   
}

# Indexing one document
PUT agency/site/1
{
  "site":{
    "name":"Burger Queen",
    "phones":[
      {
        "is_confidential":"true",
        "number":"10000000000",
        "description":"Manager Phone"
      },
      {
        "is_confidential":"false",
        "number":"10000000001",
        "description":"Public Line"
      },
      {
        "is_confidential":"false",
        "number":"10000000002",
        "description":"Public Line 2"
      },
      {
        "is_confidential":"false",
        "number":"10000000003",
        "description":"Complains Phone"
      }
    ]
  }
}


# Query the nested document (https://www.elastic.co/guide/en/elasticsearch/guide/current/nested-query.html)

GET /agency/site/_search
{
  "query": {
    "bool": {
      "must": [
        { "match": { "site.name": "Burger" }}, 
        {
          "nested": {
            "path": "phones", 
            "query": {
              "bool": {
                "must": [ 
                  { "match": { "phones.is_confidential": "false" }}
                ]
        }}}}
      ]
}}}


# Results
{
  "took": 4,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 0,
    "max_score": null,
    "hits": []
  }
}

Why can not get any result ?

What I am trying to do is filter nested documents to get only those phones where is_confidential = false. Something like this:

# Results without any filter in nested documents
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1,
    "hits": [
      {
        "_index": "agency",
        "_type": "site",
        "_id": "1",
        "_score": 1,
        "_source": {
          "site": {
            "name": "Burger Queen",
            "phones": [
              {
                "is_confidential": "true",
                "number": "10000000000",
                "description": "Manager Phone"
              },
              {
                "is_confidential": "false",
                "number": "10000000001",
                "description": "Public Line"
              },
              {
                "is_confidential": "false",
                "number": "10000000002",
                "description": "Public Line 2"
              },
              {
                "is_confidential": "false",
                "number": "10000000003",
                "description": "Complains Phone"
              }
            ]
          }
        }
      }
    ]
  }
}


# is_confidential = false results
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1,
    "hits": [
      {
        "_index": "agency",
        "_type": "site",
        "_id": "1",
        "_score": 1,
        "_source": {
          "site": {
            "name": "Burger Queen",
            "phones": [
              {
                "is_confidential": "false",
                "number": "10000000001",
                "description": "Public Line"
              },
              {
                "is_confidential": "false",
                "number": "10000000002",
                "description": "Public Line 2"
              },
              {
                "is_confidential": "false",
                "number": "10000000003",
                "description": "Complains Phone"
              }
            ]
          }
        }
      }
    ]
  }
}



# is_confidential = true
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1,
    "hits": [
      {
        "_index": "agency",
        "_type": "site",
        "_id": "1",
        "_score": 1,
        "_source": {
          "site": {
            "name": "Burger Queen",
            "phones": [
              {
                "is_confidential": "true",
                "number": "10000000000",
                "description": "Manager Phone"
              }
            ]
          }
        }
      }
    ]
  }
}

(Oscar Romero) #2

Rahulroc in stacoverflow helped me to identify my problem.

The problem was in the index. This is the correct way to indexing the document:

PUT agency/site/1
{

    "name":"Burger Queen",
    "phones":[
      {
        "is_confidential":"true",
        "number":"10000000000",
        "description":"Manager Phone"
      },
      {
        "is_confidential":"false",
        "number":"10000000001",
        "description":"Public Line"
      },
      {
        "is_confidential":"false",
        "number":"10000000002",
        "description":"Public Line 2"
      },
      {
        "is_confidential":"false",
        "number":"10000000003",
        "description":"Complains Phone"
      }
    ]
  
}

I am able to get these results:

{
  "took": 7,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1.6676441,
    "hits": [
      {
        "_index": "agency",
        "_type": "site",
        "_id": "1",
        "_score": 1.6676441,
        "_source": {
          "name": "Burger Queen",
          "phones": [
            {
              "is_confidential": "true",
              "number": "10000000000",
              "description": "Manager Phone"
            },
            {
              "is_confidential": "false",
              "number": "10000000001",
              "description": "Public Line"
            },
            {
              "is_confidential": "false",
              "number": "10000000002",
              "description": "Public Line 2"
            },
            {
              "is_confidential": "false",
              "number": "10000000003",
              "description": "Complains Phone"
            }
          ]
        },
        "inner_hits": {
          "phones": {
            "hits": {
              "total": 3,
              "max_score": 1.2231436,
              "hits": [
                {
                  "_index": "agency",
                  "_type": "site",
                  "_id": "1",
                  "_nested": {
                    "field": "phones",
                    "offset": 3
                  },
                  "_score": 1.2231436,
                  "_source": {
                    "is_confidential": "false",
                    "number": "10000000003",
                    "description": "Complains Phone"
                  }
                },
                {
                  "_index": "agency",
                  "_type": "site",
                  "_id": "1",
                  "_nested": {
                    "field": "phones",
                    "offset": 2
                  },
                  "_score": 1.2231436,
                  "_source": {
                    "is_confidential": "false",
                    "number": "10000000002",
                    "description": "Public Line 2"
                  }
                },
                {
                  "_index": "agency",
                  "_type": "site",
                  "_id": "1",
                  "_nested": {
                    "field": "phones",
                    "offset": 1
                  },
                  "_score": 1.2231436,
                  "_source": {
                    "is_confidential": "false",
                    "number": "10000000001",
                    "description": "Public Line"
                  }
                }
              ]
            }
          }
        }
      }
    ]
  }
}

Is there a better way to exclude the confidential phones from the nested documents? Or should I have to use the inner hits?

I would like to get a cleaner result like this:

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1,
    "hits": [
      {
        "_index": "agency",
        "_type": "site",
        "_id": "1",
        "_score": 1,
        "_source": {
          "site": {
            "name": "Burger Queen",
            "phones": [
              {
                "is_confidential": "false",
                "number": "10000000001",
                "description": "Public Line"
              },
              {
                "is_confidential": "false",
                "number": "10000000002",
                "description": "Public Line 2"
              },
              {
                "is_confidential": "false",
                "number": "10000000003",
                "description": "Complains Phone"
              }
            ]
          }
        }
      }
    ]
  }
}

Is this possible or should I use the inner hits results?


(system) #3