Java API - SearchHit.getVersion() method returns -1


(Mauri) #1

I want to use the ES Java API from a Servlet and want to leverage ES
version numbers. The test class below tests the basic indexing, get
and search operations from Java.
The index and get operations return the expected version numbers but
the SearchHit.getVersion() method returns -1. I am expecting this to
also return the version number, or do I have to use a get operation to
retrieve this?
Note that the class has a hardcoded path which needs to be set for the
environment it is run in. A sample of the output I get

Added: contacts/contact/contact0(ver 3)
Added: contacts/contact/contact1(ver 3)
...
Average add time: 115704536ns
Getting: contacts/contact/contact0(ver 3) {"firstName":"Steven", ...
Getting: contacts/contact/contact1(ver 3) {"firstName":"Joe", ...
...
Average get time: 541599ns
Search 'Steven': took 62ms, shards 5:0, hits 2
hit: 1.000000 contacts/contact/contact0(ver -1)
{"firstName":"Steven", ...
hit: 1.000000 contacts/contact/contact6(ver -1)
{"firstName":"Steven", ...
Search 'Joe': took 2ms, shards 5:0, hits 1
hit: 1.000000 contacts/contact/contact1(ver -1)
{"firstName":"Joe", ...
...
Average search time: 9326807ns

Some further questions
On the Java API

  1. The Java API provides the Client and TransportClient classes for
    submitting requests. Is it advisable to set up a pool of these (like
    JDBC connection pooling) or are these classes sufficiently performant
    and thread safe to be used from multiple threads, eg in a servlet that
    makes calls into ES using one of these Client classes.

On indexing
2. Does each indexing operation incur a prior index search, to check
for an existing version of a document, potentially followed by a
delete if one is found?
3. If so, is it possible to disable this if the document is guarantee
to not exist and that document IDs will be unique, eg for a bulk
import from another system that collects data and allocates unique
document IDs?

On cluster management with a large number of indexes (eg index per
user, index per month, etc)
4. Do all servers in a cluster maintain synchronized maps containing
details of all other servers in the cluster, the indexes in the
cluster and all shards/replicas of all indexes?
5. In the situation where there are many unrelated small indexes, eg
per user with 1000's of users (hypothetically), is there a point where
it is better to have several ES clusters rather than one large one, eg
4
x 5 server clusters rather than 1 x 20 server cluster. In particular,
I am wondering if there is a point where the overheads associated with
maintaining cluster configuration data,
cluster management messaging, etc become excessive and it is better to
split out to multiple clusters?

Regards
Mauri

------ Test class follows

package es_test;

import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.node.NodeBuilder;
import org.elasticsearch.node.Node;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshResponse;

import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;

import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.SearchHit;

import org.elasticsearch.common.io.stream.BytesStreamOutput;

import java.util.Date;
import java.io.ByteArrayOutputStream;

import java.io.File;
import java.io.FileReader;
import java.io.BufferedReader;

/**
*

  • @author Mauri
    */
    public class EsTest3
    {
    Node node;
    Client client;
    String indexName;
    StringBuilder builder;

/**

  • @param args the command line arguments
    */

public static void main(String[] args)
{
try
{
// Define data to index, this is just made up test data
String[][] data =
{{"Steven","Boutte", "4 Souttar Terrace", "Claremont
North","WA", "6010","AU","Steven.A.Boutte@dodgit.com", "(08) 9213
7143","3/23/1945"},
{"Joe", "Hunter", "52 Villeneuve Street", "Woolshed",
"VIC","3747","AU","Joe.C.Hunter@trashymail.com", "(03) 5331
0188","8/24/1973"},
{"Joseph","Wells", "23 Gaffney Street", "Braeside",
"VIC","3195","AU","Joseph.J.Wells@dodgit.com", "(03) 9307
7751","2/15/1952"},
{"Albert","Brown", "40 Boobialla Street", "Sandigo",
"NSW","2700","AU","Albert.L.Brown@spambob.com", "(02) 6193
4948","2/27/1943"},
{"Mike", "Rivera", "43 Edward Bennett Drive","Pemulwuy",
"NSW","2145","AU","Mike.L.Rivera@pookmail.com", "(02) 8718
3397","1/11/1944"},
{"Sandra","Brown", "84 Railway Avenue", "Taripta",
"VIC","3620","AU","Sandra.T.Hohl@dodgit.com", "(03) 5381
8994","6/23/1954"},
{"Steven","Adamski","65 Albacore Crescent", "Razorback",
"NSW","2571","AU","Curt.E.Adamski@mailinator.com","(02) 4653
4020","10/9/1954"},
{"Jane", "Quandt", "86 Gadd Avenue", "Beaufort",
"SA", "5550","AU","Darlene.E.Quandt@dodgit.com", "(08) 8315
2393","11/24/1953"},
{"Gary", "Gomez", "19 Sydney Road", "Mogo",
"NSW","2850","AU","Gary.S.Gomez@pookmail.com", "(02) 4084
2274","11/7/1956"},
{"Paul", "Roth", "99 Kogil Street", "Myall Creek",
"NSW","2403","AU","Paul.P.Roth@dodgit.com", "(02) 6788
8110","9/23/1961"}};

  // Create index and add records
  String path = "/C:/temp/es_test";
  EsTest3 esTest = new EsTest3("Cluster3", path, "contacts");
  long time = 0;

  for (int n = 0 ; n < data.length ; n++)
    time += esTest.addDoc("contact", "contact" + n,

data[n]);

  System.out.printf("Average add time: %dns \n", time /

data.length);

  // get documents
  time = 0;
  for (int n = 0 ; n < data.length ; n++)
    time += esTest.getDoc("contact", "contact" + n);

  System.out.printf("Average get time: %dns \n", time /

data.length);

  // Refresh so that updates propagate


  // do search
  Thread.sleep(2000);  // Allows index updates to propagate
  time = 0;
  for (int n = 0 ; n < data.length ; n++)
    time += esTest.search("firstName", data[n][0]);

  System.out.printf("Average search time: %dns \n", time /

data.length);

  // Finished
  esTest.close();

  System.out.printf("Done\n");

}
catch (Exception ex)
{
  ex.printStackTrace(System.out);
}

}

//---------------------------------------------------------------------------------------------

public EsTest3(String clusterName, String homePath, String
indexName)
{
// Create node and client
this.indexName = indexName;
Settings settings = ImmutableSettings.settingsBuilder()
.put("cluster.name", clusterName)
.put("path.conf", homePath)
.put("path.data", homePath + "/data")
.put("path.work", homePath + "/work")
.put("path.logs", homePath + "/logs")
.build();

node =

NodeBuilder.nodeBuilder().settings(settings).local(true).node();
client = node.client();
builder = new StringBuilder();
}

//---------------------------------------------------------------------------------------------

public void close()
{
node.close();
}

//---------------------------------------------------------------------------------------------

private long addDoc(String type, String id, String[] fields)
{

// Build JSON
builder.setLength(0);
builder.append("{");
builder.append("\"firstName\":

"").append(fields[0]).append("",");
builder.append(""lastName":"").append(fields[1]).append("",");
builder.append(""street":"").append(fields[2]).append("",");
builder.append(""city":"").append(fields[3]).append("",");
builder.append(""state":"").append(fields[4]).append("",");
builder.append(""postcode":"").append(fields[5]).append("",");
builder.append(""country":"").append(fields[6]).append("",");
builder.append(""email":"").append(fields[7]).append("",");
builder.append(""telephone":
"").append(fields[8]).append("",");
builder.append(""dob":"").append(fields[9]).append(""");
builder.append("}");

// Index the document
long dur = System.nanoTime();
IndexResponse indexResponse = client.prepareIndex(indexName, type,

id)
.setSource(builder.toString())
.execute()
.actionGet();
dur = System.nanoTime() - dur;

System.out.printf("Added: %s/%s/%s(ver %d)\n",

indexResponse.index(), indexResponse.type(), indexResponse.id(),
indexResponse.version());

return(dur);

}

//---------------------------------------------------------------------------------------------

private long getDoc(String type, String id)
{
// get document
long dur = System.nanoTime();
GetResponse getResponse = client.prepareGet(indexName, type, id)
.execute()
.actionGet();
dur = System.nanoTime() - dur;

System.out.printf("Getting:  %s/%s/%s(ver %d)  %s\n",

getResponse.index(), getResponse.type(), getResponse.id(),
getResponse.version(), getResponse.sourceAsString());

return(dur);

}

//---------------------------------------------------------------------------------------------

private long search(String field, String value)
{
// do search
long dur = System.nanoTime();
SearchResponse searchResponse = client.prepareSearch(indexName)
.setSearchType(SearchType.QUERY_THEN_FETCH)
.setQuery(QueryBuilders.termQuery(field,
value.toLowerCase()))
.setFrom(0).setSize(60) //.setExplain(true)
.execute()
.actionGet();
dur = System.nanoTime() - dur;

SearchHits hits = searchResponse.hits();

System.out.printf("Search '%s':  took %dms, shards %d:%d, hits %d

\n", value, searchResponse.tookInMillis(),
searchResponse.successfulShards(), searchResponse.failedShards(),
hits.getTotalHits());

for (int n = 0 ; n < hits.getTotalHits() && n < 10 ; n++)
{
  SearchHit hit = hits.getAt(n);
  System.out.printf("  hit:  %f %s/%s/%s(ver %d)  %s \n",

hit.score(), hit.index(), hit.type(), hit.id(), hit.getVersion(),
hit.sourceAsString());
}

return(dur);

}

}


(Shay Banon) #2

You need to add SearchRequestBuilder#setVersion(true) to get version numbers
back (it takes a bit more processing on the search / fetch execution, so its
not enabled by default).

On Sun, Oct 23, 2011 at 2:35 AM, Mauri mauri@proactive-edge.com.au wrote:

I want to use the ES Java API from a Servlet and want to leverage ES
version numbers. The test class below tests the basic indexing, get
and search operations from Java.
The index and get operations return the expected version numbers but
the SearchHit.getVersion() method returns -1. I am expecting this to
also return the version number, or do I have to use a get operation to
retrieve this?
Note that the class has a hardcoded path which needs to be set for the
environment it is run in. A sample of the output I get

Added: contacts/contact/contact0(ver 3)
Added: contacts/contact/contact1(ver 3)
...
Average add time: 115704536ns
Getting: contacts/contact/contact0(ver 3) {"firstName":"Steven", ...
Getting: contacts/contact/contact1(ver 3) {"firstName":"Joe", ...
...
Average get time: 541599ns
Search 'Steven': took 62ms, shards 5:0, hits 2
hit: 1.000000 contacts/contact/contact0(ver -1)
{"firstName":"Steven", ...
hit: 1.000000 contacts/contact/contact6(ver -1)
{"firstName":"Steven", ...
Search 'Joe': took 2ms, shards 5:0, hits 1
hit: 1.000000 contacts/contact/contact1(ver -1)
{"firstName":"Joe", ...
...
Average search time: 9326807ns

Some further questions
On the Java API

  1. The Java API provides the Client and TransportClient classes for
    submitting requests. Is it advisable to set up a pool of these (like
    JDBC connection pooling) or are these classes sufficiently performant
    and thread safe to be used from multiple threads, eg in a servlet that
    makes calls into ES using one of these Client classes.

On indexing
2. Does each indexing operation incur a prior index search, to check
for an existing version of a document, potentially followed by a
delete if one is found?
3. If so, is it possible to disable this if the document is guarantee
to not exist and that document IDs will be unique, eg for a bulk
import from another system that collects data and allocates unique
document IDs?

On cluster management with a large number of indexes (eg index per
user, index per month, etc)
4. Do all servers in a cluster maintain synchronized maps containing
details of all other servers in the cluster, the indexes in the
cluster and all shards/replicas of all indexes?
5. In the situation where there are many unrelated small indexes, eg
per user with 1000's of users (hypothetically), is there a point where
it is better to have several ES clusters rather than one large one, eg
4
x 5 server clusters rather than 1 x 20 server cluster. In particular,
I am wondering if there is a point where the overheads associated with
maintaining cluster configuration data,
cluster management messaging, etc become excessive and it is better to
split out to multiple clusters?

Regards
Mauri

------ Test class follows

package es_test;

import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.node.NodeBuilder;
import org.elasticsearch.node.Node;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshResponse;

import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;

import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.SearchHit;

import org.elasticsearch.common.io.stream.BytesStreamOutput;

import java.util.Date;
import java.io.ByteArrayOutputStream;

import java.io.File;
import java.io.FileReader;
import java.io.BufferedReader;

/**
*

  • @author Mauri
    */
    public class EsTest3
    {
    Node node;
    Client client;
    String indexName;
    StringBuilder builder;

/**

  • @param args the command line arguments
    */

public static void main(String[] args)
{
try
{
// Define data to index, this is just made up test data
String[][] data =
{{"Steven","Boutte", "4 Souttar Terrace", "Claremont
North","WA", "6010","AU","Steven.A.Boutte@dodgit.com", "(08) 9213
7143","3/23/1945"},
{"Joe", "Hunter", "52 Villeneuve Street", "Woolshed",
"VIC","3747","AU","Joe.C.Hunter@trashymail.com", "(03) 5331
0188","8/24/1973"},
{"Joseph","Wells", "23 Gaffney Street", "Braeside",
"VIC","3195","AU","Joseph.J.Wells@dodgit.com", "(03) 9307
7751","2/15/1952"},
{"Albert","Brown", "40 Boobialla Street", "Sandigo",
"NSW","2700","AU","Albert.L.Brown@spambob.com", "(02) 6193
4948","2/27/1943"},
{"Mike", "Rivera", "43 Edward Bennett Drive","Pemulwuy",
"NSW","2145","AU","Mike.L.Rivera@pookmail.com", "(02) 8718
3397","1/11/1944"},
{"Sandra","Brown", "84 Railway Avenue", "Taripta",
"VIC","3620","AU","Sandra.T.Hohl@dodgit.com", "(03) 5381
8994","6/23/1954"},
{"Steven","Adamski","65 Albacore Crescent", "Razorback",
"NSW","2571","AU","Curt.E.Adamski@mailinator.com","(02) 4653
4020","10/9/1954"},
{"Jane", "Quandt", "86 Gadd Avenue", "Beaufort",
"SA", "5550","AU","Darlene.E.Quandt@dodgit.com", "(08) 8315
2393","11/24/1953"},
{"Gary", "Gomez", "19 Sydney Road", "Mogo",
"NSW","2850","AU","Gary.S.Gomez@pookmail.com", "(02) 4084
2274","11/7/1956"},
{"Paul", "Roth", "99 Kogil Street", "Myall Creek",
"NSW","2403","AU","Paul.P.Roth@dodgit.com", "(02) 6788
8110","9/23/1961"}};

 // Create index and add records
 String path = "/C:/temp/es_test";
 EsTest3 esTest = new EsTest3("Cluster3", path, "contacts");
 long time = 0;

 for (int n = 0 ; n < data.length ; n++)
   time += esTest.addDoc("contact", "contact" + n,

data[n]);

 System.out.printf("Average add time: %dns \n", time /

data.length);

 // get documents
 time = 0;
 for (int n = 0 ; n < data.length ; n++)
   time += esTest.getDoc("contact", "contact" + n);

 System.out.printf("Average get time: %dns \n", time /

data.length);

 // Refresh so that updates propagate


 // do search
 Thread.sleep(2000);  // Allows index updates to propagate
 time = 0;
 for (int n = 0 ; n < data.length ; n++)
   time += esTest.search("firstName", data[n][0]);

 System.out.printf("Average search time: %dns \n", time /

data.length);

 // Finished
 esTest.close();

 System.out.printf("Done\n");

}
catch (Exception ex)
{
ex.printStackTrace(System.out);
}

}

//---------------------------------------------------------------------------------------------

public EsTest3(String clusterName, String homePath, String
indexName)
{
// Create node and client
this.indexName = indexName;
Settings settings = ImmutableSettings.settingsBuilder()
.put("cluster.name", clusterName)
.put("path.conf", homePath)
.put("path.data", homePath + "/data")
.put("path.work", homePath + "/work")
.put("path.logs", homePath + "/logs")
.build();

node =
NodeBuilder.nodeBuilder().settings(settings).local(true).node();
client = node.client();
builder = new StringBuilder();
}

//---------------------------------------------------------------------------------------------

public void close()
{
node.close();
}

//---------------------------------------------------------------------------------------------

private long addDoc(String type, String id, String[] fields)
{

// Build JSON
builder.setLength(0);
builder.append("{");
builder.append(""firstName":
"").append(fields[0]).append("",");
builder.append(""lastName":"").append(fields[1]).append("",");
builder.append(""street":"").append(fields[2]).append("",");
builder.append(""city":"").append(fields[3]).append("",");
builder.append(""state":"").append(fields[4]).append("",");
builder.append(""postcode":"").append(fields[5]).append("",");
builder.append(""country":"").append(fields[6]).append("",");
builder.append(""email":"").append(fields[7]).append("",");
builder.append(""telephone":
"").append(fields[8]).append("",");
builder.append(""dob":"").append(fields[9]).append(""");
builder.append("}");

// Index the document
long dur = System.nanoTime();
IndexResponse indexResponse = client.prepareIndex(indexName, type,
id)
.setSource(builder.toString())
.execute()
.actionGet();
dur = System.nanoTime() - dur;

System.out.printf("Added: %s/%s/%s(ver %d)\n",
indexResponse.index(), indexResponse.type(), indexResponse.id(),
indexResponse.version());

return(dur);
}

//---------------------------------------------------------------------------------------------

private long getDoc(String type, String id)
{
// get document
long dur = System.nanoTime();
GetResponse getResponse = client.prepareGet(indexName, type, id)
.execute()
.actionGet();
dur = System.nanoTime() - dur;

System.out.printf("Getting: %s/%s/%s(ver %d) %s\n",
getResponse.index(), getResponse.type(), getResponse.id(),
getResponse.version(), getResponse.sourceAsString());

return(dur);
}

//---------------------------------------------------------------------------------------------

private long search(String field, String value)
{
// do search
long dur = System.nanoTime();
SearchResponse searchResponse = client.prepareSearch(indexName)

.setSearchType(SearchType.QUERY_THEN_FETCH)

.setQuery(QueryBuilders.termQuery(field,
value.toLowerCase()))
.setFrom(0).setSize(60)
//.setExplain(true)
.execute()
.actionGet();
dur = System.nanoTime() - dur;

SearchHits hits = searchResponse.hits();

System.out.printf("Search '%s': took %dms, shards %d:%d, hits %d
\n", value, searchResponse.tookInMillis(),
searchResponse.successfulShards(), searchResponse.failedShards(),
hits.getTotalHits());

for (int n = 0 ; n < hits.getTotalHits() && n < 10 ; n++)
{
SearchHit hit = hits.getAt(n);
System.out.printf(" hit: %f %s/%s/%s(ver %d) %s \n",
hit.score(), hit.index(), hit.type(), hit.id(), hit.getVersion(),
hit.sourceAsString());
}

return(dur);
}

}


(system) #3