I am using dockers for spark and elasticsearch. I wrote the following code from tutorial:
import org.apache.spark.SparkConf
import org.elasticsearch.spark._
val conf = sc.getConf
conf.setMaster("local")
conf.set("es.index.auto.create", "true")
conf.set("es.nodes", "0.0.0.0")
conf.set("es.port","9200")
conf.set("es.http.timeout","5m")
conf.set("es.scroll.size","50")
// conf.set("es.nodes.wan.only","true")
// conf.set("es.nodes.client.only","true")
// conf.set("es.nodes.discovery","true")
val rdd = sc.esRDD("index/type", "?q=me*")
val size = rdd.collect().size
but I got the following error:
import org.apache.spark.SparkConf
import org.elasticsearch.spark._
conf: org.apache.spark.SparkConf = org.apache.spark.SparkConf@64cac11e
res56: org.apache.spark.SparkConf = org.apache.spark.SparkConf@64cac11e
res57: org.apache.spark.SparkConf = org.apache.spark.SparkConf@64cac11e
res58: org.apache.spark.SparkConf = org.apache.spark.SparkConf@64cac11e
res59: org.apache.spark.SparkConf = org.apache.spark.SparkConf@64cac11e
res60: org.apache.spark.SparkConf = org.apache.spark.SparkConf@64cac11e
res61: org.apache.spark.SparkConf = org.apache.spark.SparkConf@64cac11e
res62: org.apache.spark.SparkConf = org.apache.spark.SparkConf@64cac11e
rdd: org.apache.spark.rdd.RDD[(String, scala.collection.Map[String,AnyRef])] = ScalaEsRDD[3] at RDD at AbstractEsRDD.scala:17
org.elasticsearch.hadoop.EsHadoopIllegalArgumentException: Cannot detect ES version - typically this happens when accessing a WAN/Cloud instance without the proper setting 'es.nodes.wan.only'
at org.elasticsearch.hadoop.rest.InitializationUtils.discoverEsVersion(InitializationUtils.java:190)
at org.elasticsearch.hadoop.rest.RestService.findPartitions(RestService.java:231)
at org.elasticsearch.spark.rdd.AbstractEsRDD.esPartitions$lzycompute(AbstractEsRDD.scala:61)
at org.elasticsearch.spark.rdd.AbstractEsRDD.esPartitions(AbstractEsRDD.scala:60)
at org.elasticsearch.spark.rdd.AbstractEsRDD.getPartitions(AbstractEsRDD.scala:27)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.RDD.collect(RDD.scala:926)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:47)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:52)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:54)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:56)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:58)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:60)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:62)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:64)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:66)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:68)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:70)
at $iwC$$iwC$$iwC$$iwC$$iwC.(:72)
at $iwC$$iwC$$iwC$$iwC.(:74)
at $iwC$$iwC$$iwC.(:76)
at $iwC$$iwC.(:78)
at $iwC.(:80)
at (:82)
at .(:86)
at .()
at .(:7)
at .()
at $print()
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
...
As mentioned in the code (commented) I set several parameters based on suggestions on web, but non of them worked.
This is my docker-compose file for spark and elasticsearch containers:
spark:
container_name: spark
hostname: spark
image: ###
ports:
- "8088:8088"
- "8042:8042"
- "7077:7077"
- "5666:5666"
command: /opt/src/analytics/src/main/spark/etc/bootstrap.sh -d
links:
- elasticsearch
...
elasticsearch:
container_name: elasticsearch
image: ###
command: elasticsearch -Des.network.host=0.0.0.0
hostname: elasticsearch
ports:
- "9200:9200"
- "9300:9300"
I'll be thankful if anyone can help me.