I want to be able to read and write to ES from spark. I've created a elasticsearch-hadoop-with-dependencies library but it's 188Mb!
I've stripped out some stuff that I don't think I need - see my gradle configuration below. What else can be removed?
dependencies {
// the elastic search library
compile('org.elasticsearch:elasticsearch-hadoop:2.3.0') {
// FIXME: there was an issue zipping up this library so excluding it
exclude group: 'org.apache.curator', module: 'apache-curator'
// create jar file with all dependencies
task('SetupElasticSearchLibs', type: Jar) {
baseName = 'elastic-search-with-dependencies'
from {
configurations.compile.filter( {
!(it.name =~ /spark.*\.jar/ ) &&
!(it.name =~ /jetty-all.*\.jar/ ) &&
!(it.name =~ /servlet-api.*\.jar/ ) &&
!(it.name =~ /pig.*\.jar/ )
}).collect {
println it
it.isDirectory() ? it : zipTree(it)
zip64 = true
with jar