Hello All,
I am trying to access index from databricks via es query and wanted a result set based on query filter, add to data frame.
Requirement: Get output with :cd.ID=1 and cd.s_D<>0.
I get: All the cd.ID both 0 and one.
When I run this in elastic console , I get exact output where as with DF I get all . Any help would be appreciated.
query="""
{
"size" : 1000,
"query" : {
"bool" : {
"must" : [
{
"bool" : {
"must" : [
{
"term" : {
"PtK" : {
"value" : "123",
"boost" : 1.0
}
}
},
{
"nested" : {
"query" : {
"term" : {
"cd.ID" : {
"value" : 1,
"boost" : 1.0
}
}
},
"path" : "cd",
"ignore_unmapped" : false,
"score_mode" : "none",
"boost" : 1.0,
"inner_hits" : {
"name" : "cd_61",
"ignore_unmapped" : false,
"from" : 0,
"size" : 99,
"version" : false,
"seq_no_primary_term" : false,
"explain" : false,
"track_scores" : false,
"_source" : false,
"stored_fields" : "_none_"
}
}
}
],
"boost" : 1.0
}
},
{
"nested" : {
"query" : {
"bool" : {
"must_not" : [
{
"term" : {
"cd.s_D" : {
"value" : 0,
"boost" : 1.0
}
}
}
],
"boost" : 1.0
}
},
"path" : "cd",
"ignore_unmapped" : false,
"score_mode" : "none",
"boost" : 1.0,
"inner_hits" : {
"name" : "cd_62",
"ignore_unmapped" : false,
"from" : 0,
"size" : 99,
"version" : false,
"seq_no_primary_term" : false,
"explain" : false,
"track_scores" : false,
"_source" : false,
"stored_fields" : "_none_",
"fields" : [
{
"field" : "cd.s_D"
}
]
}
}
}
],
"boost" : 1.0
}
},
"_source" : false,
"fields" : [
{
"field" : "PtK"
}
]
}
"""
reader = spark.read \
.format("org.elasticsearch.spark.sql") \
.option("es.read.metadata","true") \
.option("es.nodes.wan.only","true") \
.option("es.port","XXX") \
.option("es.net.ssl","true") \
.option("es.net.http.auth.user",user) \
.option("es.net.http.auth.pass",pw) \
.option("es.nodes", url) \
.option("es.query", query)
df = reader.load("myindex")
df.createOrReplaceTempView('Finalresult')