Problem with inner_hits in elasticsearch_dsl Python client


(Nate) #1

Hi everyone,

I've encountered a problem I'm hoping someone here can help me with related to accessing inner_hits data using the Python elasticsearch_dsl client. I'll let my code do the talking (the important parts are in the comments after the test data has been generated, but this is completely self-contained so anyone should be able to reproduce the same result):

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Index, Mapping, Nested, Search, Q
from elasticsearch_dsl.connections import connections

index = "test_index"

es_con = connections.create_connection(hosts=["localhost:9200"])
es_index = Index(index)
if es_index.exists():
    es_index.delete()
es_index.create()

para_mapping = Mapping("paragraph")
para_mapping.field("sentences", Nested())
para_mapping.save(index)

test_paras = {}
for a in range(2):
    test_paras[a] = {
        "label": "Test Paragraph p{}".format(a),
        "sentences": []
    }

    for b in range(2):
        test_sent = {
            "text": "Test Sentence p{}s{}".format(a, b),
        }

        test_paras[a]["sentences"].append(test_sent)

for idx, para in test_paras.iteritems():
    para_id = "para_id_p{}".format(idx)
    es_con.create(
        index=index,
        doc_type="paragraph",
        id=para_id,
        body=para
    )

es_index.flush()

q = Search(using=es_con).index(index).doc_type('paragraph')
q = q.query('nested', path='sentences', query=Q('term', **{"sentences.text": "p0s1"}), inner_hits={})
saved_q = q
q = q.execute()
# We got the expected paragraph
print "PARA_ID:                            ", q.hits[0].meta.id
# With all sentences
print "PARA[SENTENCES]:                    ", q.hits[0].sentences
# We can see inner_hits is included in para.meta
print "DIR PARA.META:                      ", dir(q.hits[0].meta)
# And it contains a "sentences" object
print "DIR PARA.META.INNER_HITS:           ", dir(q.hits[0].meta.inner_hits)
# Of type elasticsearch_dsl.result.Response
print "TYPE PARA.META.INNER_HITS.SENTENCES:", type(q.hits[0].meta.inner_hits.sentences)
# That contains a "hits" object
print "DIR PARA.META.INNER_HITS.SENTENCES: ", dir(q.hits[0].meta.inner_hits.sentences)
# But every attempted action yields a KeyError: '_type' in result.AttrList()
print q.hits[0].meta.inner_hits.sentences

# So let's try Elasticsearch()
es = Elasticsearch()

results = es.search(index=index, body=saved_q.to_dict())
print "\nES RESULT:", results["hits"]["hits"][0]["inner_hits"]["sentences"]["hits"]["hits"][0]["_source"]["text"]

I'm new to Elasticsearch, so I can't be sure I'm not doing something incorrectly with the mappings, but I do doubt it since the Elasticsearch query works just fine. Any ideas?

Thanks in advance for any help!


(system) #2

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.