Bulk Uploading


(prashant5375) #1

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

	 Node node = nodeBuilder().local(true).
				settings(ImmutableSettings.settingsBuilder().
				put("index.number_of_shards", 1).
				put("index.number_of_replicas", 1).
				build()).build().start();
	Client	client = node.client();
		 String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

	  FileInputStream fstream = new FileInputStream("e:\\yp_CA.txt");
	  // Get the object of DataInputStream
	  DataInputStream in = new DataInputStream(fstream);
	  BufferedReader br = new BufferedReader(new InputStreamReader(in));
	  String strLine;
	  long start=System.currentTimeMillis();
	  int i=0;
	  //Read File Line By Line
	  try{
		  XContentBuilder person = null;
	  while ((strLine = br.readLine()) != null)   {
		  String tmp[] =strLine.replaceAll("\"", "").split("~\\^\\~");
			//  System.out.println(tmp[4].split("\\|"));
			  String mergeField[]=tmp[4].split("\\|");
			  String lati = mergeField[5];
			  String longi = mergeField[6];
			  i++;
			 // System.out.println(i);
		  brb.add(client.prepareIndex("test", "type1", i+"")
					.setSource(jsonBuilder().startObject()
				            // .field("MERGE_FIELD", tmp[4])
				             .field("MAPPED_FSN", tmp[0].replaceAll("\"", ""))
				             .startObject("location").field("lat",

Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
.endObject()));

			  person = jsonBuilder().startObject()

				        .field("gender", tmp[4]) //aleaList(genders) returns "M"

or "F"
// [....] creation of a random person with some
attributes
.endObject();

			  //brb.execute().actionGet();
			//  brb.setRefresh(true);

			  brb.add(client.prepareIndex("toto", "tata")
        				.setRefresh(false)
        				.setReplicationType(ReplicationType.ASYNC)
				        .setSource(person));
  if(i==100000){
			  i=0;
			brb.execute().actionGet();


			 // break;
		  }




	  }

	  brb.execute().actionGet();
	  }catch(Exception e){
		  brb.execute().actionGet();
	  }
	  brb.execute().actionGet();




	System.out.println("done!!!");

}

After every 100000 record i call "brb.execute().actionGet();" to get
data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another 100000
record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant


(Craig Brown) #2

I'd probably try inserting something less that 100k records at a time. We
usually do blocks of 10K at a time. Even with multiple threads running, I
haven't seen any problems over many millions of records.

  • Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit prashant.vicky@gmail.comwrote:

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).

settings(ImmutableSettings.settingsBuilder().
put("index.number_of_shards", 1).
put("index.number_of_replicas", 1).
build()).build().start();
Client client = node.client();
String mapping =
XContentFactory.jsonBuilder().startObject().startObject("type1")

.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

             FileInputStream fstream = new

FileInputStream("e:\yp_CA.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new
InputStreamReader(in));
String strLine;
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
while ((strLine = br.readLine()) != null) {
String tmp[] =strLine.replaceAll(""",
"").split("~\^\~");
// System.out.println(tmp[4].split("\|"));
String mergeField[]=tmp[4].split("\|");
String lati = mergeField[5];
String longi = mergeField[6];
i++;
// System.out.println(i);
brb.add(client.prepareIndex("test", "type1", i+"")

.setSource(jsonBuilder().startObject()
//
.field("MERGE_FIELD", tmp[4])
.field("MAPPED_FSN",
tmp[0].replaceAll(""", ""))

.startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
.endObject()));

                             person = jsonBuilder().startObject()

                                           .field("gender", tmp[4])

//aleaList(genders) returns "M"
or "F"
// [....] creation of a
random person with some
attributes
.endObject();

                             //brb.execute().actionGet();
                           //  brb.setRefresh(true);

                             brb.add(client.prepareIndex("toto",

"tata")
.setRefresh(false)

.setReplicationType(ReplicationType.ASYNC)
.setSource(person));
if(i==100000){
i=0;
brb.execute().actionGet();

                            // break;
                     }




             }

             brb.execute().actionGet();
             }catch(Exception e){
                     brb.execute().actionGet();
             }
             brb.execute().actionGet();




           System.out.println("done!!!");

   }

After every 100000 record i call "brb.execute().actionGet();" to get
data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another 100000
record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant

--

CRAIG BROWN
chief architect
youwho, Inc.

www.youwho.com http://www.youwho.com/

T: 801.855. 0921
M: 801.913. 0939


(prashant5375) #3

Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown cbr...@youwho.com wrote:

I'd probably try inserting something less that 100k records at a time. We
usually do blocks of 10K at a time. Even with multiple threads running, I
haven't seen any problems over many millions of records.

  • Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit prashant.vi...@gmail.comwrote:

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).

settings(ImmutableSettings.settingsBuilder().
put("index.number_of_shards", 1).
put("index.number_of_replicas", 1).
build()).build().start();
Client client = node.client();
String mapping =
XContentFactory.jsonBuilder().startObject().startObject("type1")

.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

             FileInputStream fstream = new

FileInputStream("e:\yp_CA.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new
InputStreamReader(in));
String strLine;
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
while ((strLine = br.readLine()) != null) {
String tmp[] =strLine.replaceAll(""",
"").split("~\^\~");
// System.out.println(tmp[4].split("\|"));
String mergeField[]=tmp[4].split("\|");
String lati = mergeField[5];
String longi = mergeField[6];
i++;
// System.out.println(i);
brb.add(client.prepareIndex("test", "type1", i+"")

.setSource(jsonBuilder().startObject()
//
.field("MERGE_FIELD", tmp[4])
.field("MAPPED_FSN",
tmp[0].replaceAll(""", ""))

.startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
.endObject()));

                             person = jsonBuilder().startObject()
                                           .field("gender", tmp[4])

//aleaList(genders) returns "M"
or "F"
// [....] creation of a
random person with some
attributes
.endObject();

                             //brb.execute().actionGet();
                           //  brb.setRefresh(true);
                             brb.add(client.prepareIndex("toto",

"tata")
.setRefresh(false)

.setReplicationType(ReplicationType.ASYNC)
.setSource(person));
if(i==100000){
i=0;
brb.execute().actionGet();

                            // break;
                     }
             }
             brb.execute().actionGet();
             }catch(Exception e){
                     brb.execute().actionGet();
             }
             brb.execute().actionGet();
           System.out.println("done!!!");
   }

After every 100000 record i call "brb.execute().actionGet();" to get
data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another 100000
record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant

--

CRAIG BROWN
chief architect
youwho, Inc.

www.youwho.comhttp://www.youwho.com/

T: 801.855. 0921
M: 801.913. 0939


(Berkay Mollamustafaoglu-2) #4

What do you mean by "loose previous data". Do you mean you don't see it in
the index? How do you check it, run a query? It may take a while for the
docs to be indexed, depending on the size of your docs and resources
available.

Even 10K is a lot. You don't have to do such large chunks with ES. Try
starting with 100 docs, and see how the performance is.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit prashant.vicky@gmail.comwrote:

Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown cbr...@youwho.com wrote:

I'd probably try inserting something less that 100k records at a time. We
usually do blocks of 10K at a time. Even with multiple threads running, I
haven't seen any problems over many millions of records.

  • Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <prashant.vi...@gmail.com
wrote:

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).

settings(ImmutableSettings.settingsBuilder().
put("index.number_of_shards",
1).

                                   put("index.number_of_replicas",

1).

                                   build()).build().start();
           Client  client = node.client();
                    String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")

.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

             FileInputStream fstream = new

FileInputStream("e:\yp_CA.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new
InputStreamReader(in));
String strLine;
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
while ((strLine = br.readLine()) != null) {
String tmp[] =strLine.replaceAll(""",
"").split("~\^\~");
//
System.out.println(tmp[4].split("\|"));

                             String

mergeField[]=tmp[4].split("\|");

                             String lati = mergeField[5];
                             String longi = mergeField[6];
                             i++;
                            // System.out.println(i);
                     brb.add(client.prepareIndex("test", "type1",

i+"")

.setSource(jsonBuilder().startObject()
//
.field("MERGE_FIELD", tmp[4])

.field("MAPPED_FSN",

tmp[0].replaceAll(""", ""))

.startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
.endObject()));

                             person = jsonBuilder().startObject()
                                           .field("gender", tmp[4])

//aleaList(genders) returns "M"
or "F"
// [....] creation of a
random person with some
attributes
.endObject();

                             //brb.execute().actionGet();
                           //  brb.setRefresh(true);
                             brb.add(client.prepareIndex("toto",

"tata")
.setRefresh(false)

.setReplicationType(ReplicationType.ASYNC)
.setSource(person));
if(i==100000){
i=0;
brb.execute().actionGet();

                            // break;
                     }
             }
             brb.execute().actionGet();
             }catch(Exception e){
                     brb.execute().actionGet();
             }
             brb.execute().actionGet();
           System.out.println("done!!!");
   }

After every 100000 record i call "brb.execute().actionGet();" to get
data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another 100000
record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant

--

CRAIG BROWN
chief architect
youwho, Inc.

www.youwho.comhttp://www.youwho.com/

T: 801.855. 0921
M: 801.913. 0939


(Craig Brown) #5

Strange. Let me look through what you have and compare to what we're doing.
Ours runs very stable and we're inserting as many as 10k records/sec for a
number of hours.

  • Craig

On Fri, Jan 27, 2012 at 9:29 AM, BeyondLimit prashant.vicky@gmail.comwrote:

Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown cbr...@youwho.com wrote:

I'd probably try inserting something less that 100k records at a time. We
usually do blocks of 10K at a time. Even with multiple threads running, I
haven't seen any problems over many millions of records.

  • Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <prashant.vi...@gmail.com
wrote:

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).

settings(ImmutableSettings.settingsBuilder().
put("index.number_of_shards",
1).

                                   put("index.number_of_replicas",

1).

                                   build()).build().start();
           Client  client = node.client();
                    String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")

.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

             FileInputStream fstream = new

FileInputStream("e:\yp_CA.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new
InputStreamReader(in));
String strLine;
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
while ((strLine = br.readLine()) != null) {
String tmp[] =strLine.replaceAll(""",
"").split("~\^\~");
//
System.out.println(tmp[4].split("\|"));

                             String

mergeField[]=tmp[4].split("\|");

                             String lati = mergeField[5];
                             String longi = mergeField[6];
                             i++;
                            // System.out.println(i);
                     brb.add(client.prepareIndex("test", "type1",

i+"")

.setSource(jsonBuilder().startObject()
//
.field("MERGE_FIELD", tmp[4])

.field("MAPPED_FSN",

tmp[0].replaceAll(""", ""))

.startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
.endObject()));

                             person = jsonBuilder().startObject()
                                           .field("gender", tmp[4])

//aleaList(genders) returns "M"
or "F"
// [....] creation of a
random person with some
attributes
.endObject();

                             //brb.execute().actionGet();
                           //  brb.setRefresh(true);
                             brb.add(client.prepareIndex("toto",

"tata")
.setRefresh(false)

.setReplicationType(ReplicationType.ASYNC)
.setSource(person));
if(i==100000){
i=0;
brb.execute().actionGet();

                            // break;
                     }
             }
             brb.execute().actionGet();
             }catch(Exception e){
                     brb.execute().actionGet();
             }
             brb.execute().actionGet();
           System.out.println("done!!!");
   }

After every 100000 record i call "brb.execute().actionGet();" to get
data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another 100000
record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant

--

CRAIG BROWN
chief architect
youwho, Inc.

www.youwho.comhttp://www.youwho.com/

T: 801.855. 0921
M: 801.913. 0939

--

CRAIG BROWN
chief architect
youwho, Inc.

www.youwho.com http://www.youwho.com/

T: 801.855. 0921
M: 801.913. 0939


(prashant5375) #6

I mean to say , while doing first insert i see the size of index
changes to 13 mb , then after 2nd insert the size changes to some 12
mb of so.According to the logic it should grow the size to 25 mb or
more .... it should keep growing.
Regards
Prashant

On Jan 27, 9:37 pm, Berkay Mollamustafaoglu mber...@gmail.com wrote:

What do you mean by "loose previous data". Do you mean you don't see it in
the index? How do you check it, run a query? It may take a while for the
docs to be indexed, depending on the size of your docs and resources
available.

Even 10K is a lot. You don't have to do such large chunks with ES. Try
starting with 100 docs, and see how the performance is.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit prashant.vi...@gmail.comwrote:

Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown cbr...@youwho.com wrote:

I'd probably try inserting something less that 100k records at a time. We
usually do blocks of 10K at a time. Even with multiple threads running, I
haven't seen any problems over many millions of records.

  • Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <prashant.vi...@gmail.com
wrote:

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).

settings(ImmutableSettings.settingsBuilder().
put("index.number_of_shards",
1).

                                   put("index.number_of_replicas",

1).

                                   build()).build().start();
           Client  client = node.client();
                    String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")

.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

             FileInputStream fstream = new

FileInputStream("e:\yp_CA.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new
InputStreamReader(in));
String strLine;
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
while ((strLine = br.readLine()) != null) {
String tmp[] =strLine.replaceAll(""",
"").split("~\^\~");
//
System.out.println(tmp[4].split("\|"));

                             String

mergeField[]=tmp[4].split("\|");

                             String lati = mergeField[5];
                             String longi = mergeField[6];
                             i++;
                            // System.out.println(i);
                     brb.add(client.prepareIndex("test", "type1",

i+"")

.setSource(jsonBuilder().startObject()
//
.field("MERGE_FIELD", tmp[4])

.field("MAPPED_FSN",

tmp[0].replaceAll(""", ""))

.startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
.endObject()));

                             person = jsonBuilder().startObject()
                                           .field("gender", tmp[4])

//aleaList(genders) returns "M"
or "F"
// [....] creation of a
random person with some
attributes
.endObject();

                             //brb.execute().actionGet();
                           //  brb.setRefresh(true);
                             brb.add(client.prepareIndex("toto",

"tata")
.setRefresh(false)

.setReplicationType(ReplicationType.ASYNC)
.setSource(person));
if(i==100000){
i=0;
brb.execute().actionGet();

                            // break;
                     }
             }
             brb.execute().actionGet();
             }catch(Exception e){
                     brb.execute().actionGet();
             }
             brb.execute().actionGet();
           System.out.println("done!!!");
   }

After every 100000 record i call "brb.execute().actionGet();" to get
data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another 100000
record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant

--

CRAIG BROWN
chief architect
youwho, Inc.

www.youwho.comhttp://www.youwho.com/

T: 801.855. 0921
M: 801.913. 0939


(Berkay Mollamustafaoglu-2) #7

You can check the number of documents via the API or using BigDesk to get a
better indication of how many docs are indexed.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:43 AM, BeyondLimit prashant.vicky@gmail.comwrote:

I mean to say , while doing first insert i see the size of index
changes to 13 mb , then after 2nd insert the size changes to some 12
mb of so.According to the logic it should grow the size to 25 mb or
more .... it should keep growing.
Regards
Prashant

On Jan 27, 9:37 pm, Berkay Mollamustafaoglu mber...@gmail.com wrote:

What do you mean by "loose previous data". Do you mean you don't see it
in
the index? How do you check it, run a query? It may take a while for the
docs to be indexed, depending on the size of your docs and resources
available.

Even 10K is a lot. You don't have to do such large chunks with ES. Try
starting with 100 docs, and see how the performance is.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <prashant.vi...@gmail.com
wrote:

Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown cbr...@youwho.com wrote:

I'd probably try inserting something less that 100k records at a
time. We

usually do blocks of 10K at a time. Even with multiple threads
running, I

haven't seen any problems over many millions of records.

  • Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <
prashant.vi...@gmail.com

wrote:

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).

settings(ImmutableSettings.settingsBuilder().

put("index.number_of_shards",

1).

put("index.number_of_replicas",

1).

                                   build()).build().start();
           Client  client = node.client();
                    String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")

.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()

.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

             FileInputStream fstream = new

FileInputStream("e:\yp_CA.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new
InputStreamReader(in));
String strLine;
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
while ((strLine = br.readLine()) != null) {
String tmp[] =strLine.replaceAll(""",
"").split("~\^\~");
//
System.out.println(tmp[4].split("\|"));

                             String

mergeField[]=tmp[4].split("\|");

                             String lati = mergeField[5];
                             String longi = mergeField[6];
                             i++;
                            // System.out.println(i);
                     brb.add(client.prepareIndex("test",

"type1",

i+"")

.setSource(jsonBuilder().startObject()
//
.field("MERGE_FIELD", tmp[4])

.field("MAPPED_FSN",

tmp[0].replaceAll(""", ""))

.startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
.endObject()));

                             person =

jsonBuilder().startObject()

                                           .field("gender",

tmp[4])

//aleaList(genders) returns "M"
or "F"
// [....] creation
of a

random person with some
attributes
.endObject();

                             //brb.execute().actionGet();
                           //  brb.setRefresh(true);

brb.add(client.prepareIndex("toto",

"tata")
.setRefresh(false)

.setReplicationType(ReplicationType.ASYNC)
.setSource(person));
if(i==100000){
i=0;
brb.execute().actionGet();

                            // break;
                     }
             }
             brb.execute().actionGet();
             }catch(Exception e){
                     brb.execute().actionGet();
             }
             brb.execute().actionGet();
           System.out.println("done!!!");
   }

After every 100000 record i call "brb.execute().actionGet();" to
get

data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another
100000

record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant

--

CRAIG BROWN
chief architect
youwho, Inc.

www.youwho.comhttp://www.youwho.com/

T: 801.855. 0921
M: 801.913. 0939


(prashant5375) #8

I know what you are trying to say , but if i have a data of 2 gb which
i am trying to insert in to ES , its not be possible that the index
size will remain only 13 mb.
I thing i am doing something wrong , but dont know what , its like i
am adding data not in append mode , it replaces the old data.
Regards
Prashant

On Jan 27, 9:53 pm, Berkay Mollamustafaoglu mber...@gmail.com wrote:

You can check the number of documents via the API or using BigDesk to get a
better indication of how many docs are indexed.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:43 AM, BeyondLimit prashant.vi...@gmail.comwrote:

I mean to say , while doing first insert i see the size of index
changes to 13 mb , then after 2nd insert the size changes to some 12
mb of so.According to the logic it should grow the size to 25 mb or
more .... it should keep growing.
Regards
Prashant

On Jan 27, 9:37 pm, Berkay Mollamustafaoglu mber...@gmail.com wrote:

What do you mean by "loose previous data". Do you mean you don't see it
in
the index? How do you check it, run a query? It may take a while for the
docs to be indexed, depending on the size of your docs and resources
available.

Even 10K is a lot. You don't have to do such large chunks with ES. Try
starting with 100 docs, and see how the performance is.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <prashant.vi...@gmail.com
wrote:

Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown cbr...@youwho.com wrote:

I'd probably try inserting something less that 100k records at a
time. We

usually do blocks of 10K at a time. Even with multiple threads
running, I

haven't seen any problems over many millions of records.

  • Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <
prashant.vi...@gmail.com

wrote:

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).

settings(ImmutableSettings.settingsBuilder().

put("index.number_of_shards",

1).

put("index.number_of_replicas",

1).

                                   build()).build().start();
           Client  client = node.client();
                    String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")

.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()

.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

             FileInputStream fstream = new

FileInputStream("e:\yp_CA.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new
InputStreamReader(in));
String strLine;
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
while ((strLine = br.readLine()) != null) {
String tmp[] =strLine.replaceAll(""",
"").split("~\^\~");
//
System.out.println(tmp[4].split("\|"));

                             String

mergeField[]=tmp[4].split("\|");

                             String lati = mergeField[5];
                             String longi = mergeField[6];
                             i++;
                            // System.out.println(i);
                     brb.add(client.prepareIndex("test",

"type1",

i+"")

.setSource(jsonBuilder().startObject()
//
.field("MERGE_FIELD", tmp[4])

.field("MAPPED_FSN",

tmp[0].replaceAll(""", ""))

.startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
.endObject()));

                             person =

jsonBuilder().startObject()

                                           .field("gender",

tmp[4])

//aleaList(genders) returns "M"
or "F"
// [....] creation
of a

random person with some
attributes
.endObject();

                             //brb.execute().actionGet();
                           //  brb.setRefresh(true);

brb.add(client.prepareIndex("toto",

"tata")
.setRefresh(false)

.setReplicationType(ReplicationType.ASYNC)
.setSource(person));
if(i==100000){
i=0;
brb.execute().actionGet();

                            // break;
                     }
             }
             brb.execute().actionGet();
             }catch(Exception e){
                     brb.execute().actionGet();
             }
             brb.execute().actionGet();
           System.out.println("done!!!");
   }

After every 100000 record i call "brb.execute().actionGet();" to
get

data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another
100000

record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant

--

CRAIG BROWN
chief architect
youwho, Inc.

www.youwho.comhttp://www.youwho.com/

T: 801.855. 0921
M: 801.913. 0939


(prashant5375) #9

If you run the given code you will see its not adding index.

import static
org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static
org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Date;

import org.elasticsearch.action.support.replication.ReplicationType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.action.bulk.BulkRequestBuilder;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.node.Node;

public class NewTest2 {

/**
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
	// TODO Auto-generated method stub

	 Node node = nodeBuilder().local(true).
				settings(ImmutableSettings.settingsBuilder().
				put("index.number_of_shards", 1).
				put("index.number_of_replicas", 1).
				build()).build().start();
	Client	client = node.client();
		 String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
for (int j=0;j<999999999;j++) {

			  i++;
		  brb.add(client.prepareIndex("test", "type1", "1")
					.setSource(jsonBuilder().startObject()
				            // .field("MERGE_FIELD", tmp[4])
				             .field("MAPPED_FSN", "Just a tem data"+j)
				             .startObject("location").field("lat",

Double.parseDouble("-117.40")).field("lon",
Double.parseDouble("32.00")).endObject()
.endObject()));

		  if(i==1000){
			  i=0;
			  System.out.println("in 1000");
			  brb.execute().actionGet();
			  System.out.println("cominted..."+(System.currentTimeMillis()-

start));
}

	  }

	  }catch(Exception e){
	  }


	System.out.println("done!!!");

}

}

On Jan 27, 10:00 pm, BeyondLimit prashant.vi...@gmail.com wrote:

I know what you are trying to say , but if i have a data of 2 gb which
i am trying to insert in to ES , its not be possible that the index
size will remain only 13 mb.
I thing i am doing something wrong , but dont know what , its like i
am adding data not in append mode , it replaces the old data.
Regards
Prashant

On Jan 27, 9:53 pm, Berkay Mollamustafaoglu mber...@gmail.com wrote:

You can check the number of documents via the API or using BigDesk to get a
better indication of how many docs are indexed.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:43 AM, BeyondLimit prashant.vi...@gmail.comwrote:

I mean to say , while doing first insert i see the size of index
changes to 13 mb , then after 2nd insert the size changes to some 12
mb of so.According to the logic it should grow the size to 25 mb or
more .... it should keep growing.
Regards
Prashant

On Jan 27, 9:37 pm, Berkay Mollamustafaoglu mber...@gmail.com wrote:

What do you mean by "loose previous data". Do you mean you don't see it
in
the index? How do you check it, run a query? It may take a while for the
docs to be indexed, depending on the size of your docs and resources
available.

Even 10K is a lot. You don't have to do such large chunks with ES. Try
starting with 100 docs, and see how the performance is.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <prashant.vi...@gmail.com
wrote:

Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown cbr...@youwho.com wrote:

I'd probably try inserting something less that 100k records at a
time. We

usually do blocks of 10K at a time. Even with multiple threads
running, I

haven't seen any problems over many millions of records.

  • Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <
prashant.vi...@gmail.com

wrote:

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).

settings(ImmutableSettings.settingsBuilder().

put("index.number_of_shards",

1).

put("index.number_of_replicas",

1).

                                   build()).build().start();
           Client  client = node.client();
                    String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")

.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()

.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

             FileInputStream fstream = new

FileInputStream("e:\yp_CA.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new
InputStreamReader(in));
String strLine;
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
while ((strLine = br.readLine()) != null) {
String tmp[] =strLine.replaceAll(""",
"").split("~\^\~");
//
System.out.println(tmp[4].split("\|"));

                             String

mergeField[]=tmp[4].split("\|");

                             String lati = mergeField[5];
                             String longi = mergeField[6];
                             i++;
                            // System.out.println(i);
                     brb.add(client.prepareIndex("test",

"type1",

i+"")

.setSource(jsonBuilder().startObject()
//
.field("MERGE_FIELD", tmp[4])

.field("MAPPED_FSN",

tmp[0].replaceAll(""", ""))

.startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
.endObject()));

                             person =

jsonBuilder().startObject()

                                           .field("gender",

tmp[4])

//aleaList(genders) returns "M"
or "F"
// [....] creation
of a

random person with some
attributes
.endObject();

                             //brb.execute().actionGet();
                           //  brb.setRefresh(true);

brb.add(client.prepareIndex("toto",

"tata")
.setRefresh(false)

.setReplicationType(ReplicationType.ASYNC)
.setSource(person));
if(i==100000){
i=0;
brb.execute().actionGet();

                            // break;
                     }
             }
             brb.execute().actionGet();
             }catch(Exception e){
                     brb.execute().actionGet();
             }
             brb.execute().actionGet();
           System.out.println("done!!!");
   }

After every 100000 record i call "brb.execute().actionGet();" to
get

data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another
100000

record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant

--

CRAIG BROWN
chief architect
youwho, Inc.

www.youwho.comhttp://www.youwho.com/

T: 801.855. 0921
M: 801.913. 0939


(Ivan Brusic) #10

First, please gist your code since it is quite long. https://gist.github.com/

Pay attention to the prepareIndex method signature:
https://github.com/elasticsearch/elasticsearch/blob/master/src/main/java/org/elasticsearch/client/Client.java#L185

IndexRequestBuilder prepareIndex(String index, String type, @Nullable
String id);

The last param is the unique id. If you call prepareIndex with the
exact same params, then your last update will be overwritten. The size
of the index might be going down because the index was optimized after
you re-added the same documents.

Your original code has
brb.add(client.prepareIndex("test", "type1", i+"")
...
if(i==100000) {
i=0;
}

You are overwriting the documents during the next pass. Try:
if( i % 100000 == 0)

--
Ivan

On Fri, Jan 27, 2012 at 9:28 AM, BeyondLimit prashant.vicky@gmail.com wrote:

If you run the given code you will see its not adding index.

import static
org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static
org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Date;

import org.elasticsearch.action.support.replication.ReplicationType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.action.bulk.BulkRequestBuilder;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.node.Node;

public class NewTest2 {

   /**
    * @param args
    * @throws Exception
    */
   public static void main(String[] args) throws Exception {
           // TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).
                                   settings(ImmutableSettings.settingsBuilder().
                                   put("index.number_of_shards", 1).
                                   put("index.number_of_replicas", 1).
                                   build()).build().start();
           Client  client = node.client();
                    String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
for (int j=0;j<999999999;j++) {

                             i++;
                     brb.add(client.prepareIndex("test", "type1", "1")
                                           .setSource(jsonBuilder().startObject()
                                               // .field("MERGE_FIELD", tmp[4])
                                                .field("MAPPED_FSN", "Just a tem data"+j)
                                                .startObject("location").field("lat",

Double.parseDouble("-117.40")).field("lon",
Double.parseDouble("32.00")).endObject()
.endObject()));

                     if(i==1000){
                             i=0;
                             System.out.println("in 1000");
                             brb.execute().actionGet();
                             System.out.println("cominted..."+(System.currentTimeMillis()-

start));
}

             }

             }catch(Exception e){
             }


           System.out.println("done!!!");

   }

}

On Jan 27, 10:00 pm, BeyondLimit prashant.vi...@gmail.com wrote:

I know what you are trying to say , but if i have a data of 2 gb which
i am trying to insert in to ES , its not be possible that the index
size will remain only 13 mb.
I thing i am doing something wrong , but dont know what , its like i
am adding data not in append mode , it replaces the old data.
Regards
Prashant

On Jan 27, 9:53 pm, Berkay Mollamustafaoglu mber...@gmail.com wrote:

You can check the number of documents via the API or using BigDesk to get a
better indication of how many docs are indexed.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:43 AM, BeyondLimit prashant.vi...@gmail.comwrote:

I mean to say , while doing first insert i see the size of index
changes to 13 mb , then after 2nd insert the size changes to some 12
mb of so.According to the logic it should grow the size to 25 mb or
more .... it should keep growing.
Regards
Prashant

On Jan 27, 9:37 pm, Berkay Mollamustafaoglu mber...@gmail.com wrote:

What do you mean by "loose previous data". Do you mean you don't see it
in
the index? How do you check it, run a query? It may take a while for the
docs to be indexed, depending on the size of your docs and resources
available.

Even 10K is a lot. You don't have to do such large chunks with ES. Try
starting with 100 docs, and see how the performance is.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <prashant.vi...@gmail.com
wrote:

Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown cbr...@youwho.com wrote:

I'd probably try inserting something less that 100k records at a
time. We

usually do blocks of 10K at a time. Even with multiple threads
running, I

haven't seen any problems over many millions of records.

  • Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <
prashant.vi...@gmail.com

wrote:

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).

settings(ImmutableSettings.settingsBuilder().

put("index.number_of_shards",

1).

put("index.number_of_replicas",

1).

                                   build()).build().start();
           Client  client = node.client();
                    String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")

.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()

.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

             FileInputStream fstream = new

FileInputStream("e:\yp_CA.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new
InputStreamReader(in));
String strLine;
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
while ((strLine = br.readLine()) != null) {
String tmp[] =strLine.replaceAll(""",
"").split("~\^\~");
//
System.out.println(tmp[4].split("\|"));

                             String

mergeField[]=tmp[4].split("\|");

                             String lati = mergeField[5];
                             String longi = mergeField[6];
                             i++;
                            // System.out.println(i);
                     brb.add(client.prepareIndex("test",

"type1",

i+"")

.setSource(jsonBuilder().startObject()
//
.field("MERGE_FIELD", tmp[4])

.field("MAPPED_FSN",

tmp[0].replaceAll(""", ""))

.startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
.endObject()));

                             person =

jsonBuilder().startObject()

                                           .field("gender",

tmp[4])

//aleaList(genders) returns "M"
or "F"
// [....] creation
of a

random person with some
attributes
.endObject();

                             //brb.execute().actionGet();
                           //  brb.setRefresh(true);

brb.add(client.prepareIndex("toto",

"tata")
.setRefresh(false)

.setReplicationType(ReplicationType.ASYNC)
.setSource(person));
if(i==100000){
i=0;
brb.execute().actionGet();

                            // break;
                     }
             }
             brb.execute().actionGet();
             }catch(Exception e){
                     brb.execute().actionGet();
             }
             brb.execute().actionGet();
           System.out.println("done!!!");
   }

After every 100000 record i call "brb.execute().actionGet();" to
get

data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another
100000

record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant

--

CRAIG BROWN
chief architect
youwho, Inc.

www.youwho.comhttp://www.youwho.com/

T: 801.855. 0921
M: 801.913. 0939


(prashant5375) #11

Hi Ivan ,
Thanks a lot for the help. its working now.
Regards
Prashant

On Jan 28, 5:33 am, Ivan Brusic i...@brusic.com wrote:

First, please gist your code since it is quite long.https://gist.github.com/

Pay attention to the prepareIndex method signature:https://github.com/elasticsearch/elasticsearch/blob/master/src/main/j...

IndexRequestBuilder prepareIndex(String index, String type, @Nullable
String id);

The last param is the unique id. If you call prepareIndex with the
exact same params, then your last update will be overwritten. The size
of the index might be going down because the index was optimized after
you re-added the same documents.

Your original code has
brb.add(client.prepareIndex("test", "type1", i+"")
...
if(i==100000) {
i=0;

}

You are overwriting the documents during the next pass. Try:
if( i % 100000 == 0)

--
Ivan

On Fri, Jan 27, 2012 at 9:28 AM, BeyondLimit prashant.vi...@gmail.com wrote:

If you run the given code you will see its not adding index.

import static
org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static
org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Date;

import org.elasticsearch.action.support.replication.ReplicationType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.action.bulk.BulkRequestBuilder;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.node.Node;

public class NewTest2 {

   /**
    * @param args
    * @throws Exception
    */
   public static void main(String[] args) throws Exception {
           // TODO Auto-generated method stub
            Node node = nodeBuilder().local(true).
                                   settings(ImmutableSettings.settingsBuilder().
                                   put("index.number_of_shards", 1).
                                   put("index.number_of_replicas", 1).
                                   build()).build().start();
           Client  client = node.client();
                    String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
XContentBuilder person = null;
for (int j=0;j<999999999;j++) {

                             i++;
                     brb.add(client.prepareIndex("test", "type1", "1")
                                           .setSource(jsonBuilder().startObject()
                                               // .field("MERGE_FIELD", tmp[4])
                                                .field("MAPPED_FSN", "Just a tem data"+j)
                                                .startObject("location").field("lat",

Double.parseDouble("-117.40")).field("lon",
Double.parseDouble("32.00")).endObject()
.endObject()));

                     if(i==1000){
                             i=0;
                             System.out.println("in 1000");
                             brb.execute().actionGet();
                             System.out.println("cominted..."+(System.currentTimeMillis()-

start));
}

             }
             }catch(Exception e){
             }
           System.out.println("done!!!");
   }

}

On Jan 27, 10:00 pm, BeyondLimit prashant.vi...@gmail.com wrote:

I know what you are trying to say , but if i have a data of 2 gb which
i am trying to insert in to ES , its not be possible that the index
size will remain only 13 mb.
I thing i am doing something wrong , but dont know what , its like i
am adding data not in append mode , it replaces the old data.
Regards
Prashant

On Jan 27, 9:53 pm, Berkay Mollamustafaoglu mber...@gmail.com wrote:

You can check the number of documents via the API or using BigDesk to get a
better indication of how many docs are indexed.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:43 AM, BeyondLimit prashant.vi...@gmail.comwrote:

I mean to say , while doing first insert i see the size of index
changes to 13 mb , then after 2nd insert the size changes to some 12
mb of so.According to the logic it should grow the size to 25 mb or
more .... it should keep growing.
Regards
Prashant

On Jan 27, 9:37 pm, Berkay Mollamustafaoglu mber...@gmail.com wrote:

What do you mean by "loose previous data". Do you mean you don't see it
in
the index? How do you check it, run a query? It may take a while for the
docs to be indexed, depending on the size of your docs and resources
available.

Even 10K is a lot. You don't have to do such large chunks with ES. Try
starting with 100 docs, and see how the performance is.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype

On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <prashant.vi...@gmail.com
wrote:

Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown cbr...@youwho.com wrote:

I'd probably try inserting something less that 100k records at a
time. We

usually do blocks of 10K at a time. Even with multiple threads
running, I

haven't seen any problems over many millions of records.

  • Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <
prashant.vi...@gmail.com

wrote:

Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub

            Node node = nodeBuilder().local(true).

settings(ImmutableSettings.settingsBuilder().

put("index.number_of_shards",

1).

put("index.number_of_replicas",

1).

                                   build()).build().start();
           Client  client = node.client();
                    String mapping =

XContentFactory.jsonBuilder().startObject().startObject("type1")

.startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()

.endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
BulkRequestBuilder brb = client.prepareBulk();

             FileInputStream fstream = new

FileInputStream("e:\yp_CA.txt");
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new
InputStreamReader(in));
String strLine;
long start=System.currentTimeMillis();
int i=0;
//Read File Line By Line
try{
...

read more »


(system) #12