Connecting elastic search through pyspark


def main():
    es_read_conf = {
        "es.nodes": "ES_LINK",
        "es.port": "9200",
        "es.net.http.auth.user": 'ES_USERNAME',
        "es.net.http.auth.pass":'ES_PASSWORD',
    }
    print("ES_CONF_START")
    es_rdd = sc.newAPIHadoopRDD(
        inputFormatClass="org.elasticsearch.hadoop.mr.EsInputFormat",
        keyClass="org.apache.hadoop.io.NullWritable",
        valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable",
        conf=es_read_conf)
    print("ES_CONF_DONE")

if __name__ == '__main__':

    conf = SparkConf().setAppName("ESTest")
    print('conf done')
    sc = SparkContext(conf=conf)
    print('sc done')
    sqlContext = SQLContext(sc)
    print('sql done')
    main()

java.lang.ClassNotFoundException: org.elasticsearch.hadoop.mr.LinkedMapWritable
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:419)
at java.lang.ClassLoader.loadClass(ClassLoader.java:352)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.util.Utils$.classForName(Utils.scala:237)
at org.apache.spark.api.python.PythonRDD$.newAPIHadoopRDDFromClassNames(PythonRDD.scala:313)
I have been trying to connect to elastic search through python facing this issue?can anyone please help?

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.

Hi @Destructord007. How did you start pyspark. It looks like the es-spark library was not available in your pyspark session. Make sure when you start pyspark that you pass it the location of your es-spark library, something like --jars /tmp/elasticsearch-spark-30_2.12-7.15.0.jar