Empty inner_hits in compound filter

I'm seeing what appears to be aberrant behavior in inner_hits results within nested boolean queries.

Test data (abbreviated due to character limit):

# MAPPING
PUT unit_testing
{
    "mappings": {
        "document": {
            "properties": {
                "display_name": {"type": "text"},
                "metadata": {
                    "properties": {
                        "NAME": {"type": "text"}
                    }
                }
            }
        },
        "paragraph": {
            "_parent": {"type": "document"},
            "_routing": {"required": true},
            "properties": {
                "checksum": {"type": "text"},
                "sentences": {
                    "type": "nested",
                    "properties": {
                        "text": {"type": "text"}
                    }
                }
            }
        }
    }
}

# DOCUMENT X 2
PUT unit_testing/document/doc_id_d0
{
    "display_name": "Test Document d0",
    "paragraphs": [
        "para_id_d0p0",
        "para_id_d0p1"
    ],
    "metadata": {"NAME": "Test Document d0 Metadata"}
}

# PARAGRAPH X 4
PUT unit_testing/paragraph/para_id_d0p0?parent=doc_id_d0
{
    "checksum": "para_checksum_d0p0",
    "sentences": [
        {"text": "Test sentence d0p0s0"},
        {"text": "Test sentence d0p0s1 ODD"},
        {"text": "Test sentence d0p0s2 EVEN"},
        {"text": "Test sentence d0p0s3 ODD"},
        {"text": "Test sentence d0p0s4 EVEN"}
    ]
}

This initial query behaves as I would expect:

GET unit_testing/paragraph/_search
{
    "_source": "false", 
    "query": {
        "bool": {
            "must": [
                {
                    "has_parent": {
                        "query": {
                            "match_phrase": {
                                "metadata.NAME": "Test Document d0 Metadata"
                            }
                        }, 
                        "type": "document"
                    }
                }, 
                {
                    "nested": {
                        "inner_hits": {}, 
                        "path": "sentences", 
                        "query": {
                            "match": {
                                "sentences.text": "d0p0s0"
                            }
                        }
                    }
                }
            ]
        }
    }
}

It yields an inner_hits object containing the one sentence that matched the predicate (some fields removed for clarity):

{
  "hits": {
    "hits": [
      {
        "_source": {},
        "inner_hits": {
          "sentences": {
            "hits": {
              "hits": [
                {
                  "_source": {
                    "text": "Test sentence d0p0s0"
                  }
                }
              ]
            }
          }
        }
      }
    ]
  }
}

The following query attempts to embed the query above within a parent "should" clause, to create a logical OR relationship between the initial query, and an additional query that matches a single sentence:

# MISBEHAVING QUERY
GET unit_testing/paragraph/_search
{
    "_source": "false", 
    "query": {
        "bool": {
            "should": [
                {
                    "bool": {
                        "must": [
                            {
                                "has_parent": {
                                    "query": {
                                        "match_phrase": {
                                            "metadata.NAME": "Test Document d0 Metadata"
                                        }
                                    }, 
                                    "type": "document"
                                }
                            }, 
                            {
                                "nested": {
                                    "inner_hits": {}, 
                                    "path": "sentences", 
                                    "query": {
                                        "match": {
                                            "sentences.text": "d0p0s0"
                                        }
                                    }
                                }
                            }
                        ]
                    }
                }, 
                {
                    "nested": {
                        "inner_hits": {}, 
                        "path": "sentences", 
                        "query": {
                            "match": {
                                "sentences.text": "d1p0s0"
                            }
                        }
                    }
                }
            ]
        }
    }
}

While the "d1" query outputs the result one would expect, with an inner_hits object containing the matching sentence, the original "d0" query now yields an empty inner_hits object:

{
  "hits": {
    "hits": [
      {
        "_source": {},
        "inner_hits": {
          "sentences": {
            "hits": {
              "total": 0,
              "hits": []
            }
          }
        }
      },
      {
        "_source": {},
        "inner_hits": {
          "sentences": {
            "hits": {
              "hits": [
                {
                  "_source": {
                    "text": "Test sentence d1p0s0"
                  }
                }
              ]
            }
          }
        }
      }
    ]
  }
}

Although I'm using the elasticsearch_dsl Python library to build and combine these queries, and I'm something of a novice with respect to the Query DSL, the query format looks solid to me.

What am I missing?

1 Like

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.