Seeing OOM in 0.19.0.RC2


(ajan) #1

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(Shay Banon) #2

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(ajan) #3

On Feb 21, 9:10 pm, Shay Banon kim...@gmail.com wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(ajan) #4

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRecovering(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalIndexShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan jan.af...@gmail.com wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon kim...@gmail.com wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(Shay Banon) #5

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRecovering(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalIndexShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(ajan) #6

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon kim...@gmail.com wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(ajan) #7

I tried fresh with 0.18.7 and got the same OOM

More errors at https://gist.github.com/75255df466d9c779ce30

With best regards,
Afzal Jan

On Feb 23, 11:04 am, ajan jan.af...@gmail.com wrote:

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon kim...@gmail.com wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(Shay Banon) #8

Do you wait for a response for each index request, or do you just fire them without "actionGet"/"get" the response? If so, you might end up overloading ES with many concurrent requests. You can configure ES to throttle such cases, but lets see if thats the case first.

On Thursday, February 23, 2012 at 11:34 AM, ajan wrote:

I tried fresh with 0.18.7 and got the same OOM

More errors at https://gist.github.com/75255df466d9c779ce30

With best regards,
Afzal Jan

On Feb 23, 11:04 am, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(ajan) #9

No, I don't wait for response. The part of source is gist here;

With best regards,
Afzal Jan

On Feb 26, 2:58 pm, Shay Banon kim...@gmail.com wrote:

Do you wait for a response for each index request, or do you just fire them without "actionGet"/"get" the response? If so, you might end up overloading ES with many concurrent requests. You can configure ES to throttle such cases, but lets see if thats the case first.

On Thursday, February 23, 2012 at 11:34 AM, ajan wrote:

I tried fresh with 0.18.7 and got the same OOM

More errors athttps://gist.github.com/75255df466d9c779ce30

With best regards,
Afzal Jan

On Feb 23, 11:04 am, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(Shay Banon) #10

So that will cause it, you are overloading ES with many concurrent index requests. If you already do threading, then wait for a response. Btw, you send a refresh right after sending the index request, you shouldn't do it, since its both inefficient, as well as meaningless since you don't wait for the index request to happen.

On Sunday, February 26, 2012 at 4:48 PM, ajan wrote:

No, I don't wait for response. The part of source is gist here;

https://gist.github.com/56a1587a8384fc5ca364

With best regards,
Afzal Jan

On Feb 26, 2:58 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Do you wait for a response for each index request, or do you just fire them without "actionGet"/"get" the response? If so, you might end up overloading ES with many concurrent requests. You can configure ES to throttle such cases, but lets see if thats the case first.

On Thursday, February 23, 2012 at 11:34 AM, ajan wrote:

I tried fresh with 0.18.7 and got the same OOM

More errors athttps://gist.github.com/75255df466d9c779ce30 (http://gist.github.com/75255df466d9c779ce30)

With best regards,
Afzal Jan

On Feb 23, 11:04 am, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(ajan) #11

Looks like that resolved, however, it has reduced the no of docs
indexed per second ~50 now after doing the "actionGet/Get". Previously
it used to index ~500 docs per second.

I already do threading and try to index docs concurrently. In this
tests I add the docs to the same index. Going forward I would have to
add docs to multiple indices concurrently. I need this concurrency for
our product that has many services who could use ES cluster to index
and search through docs of their and other services.

I will tweak my tests to see how much of concurrency is acceptable. If
you have information on this that would be good.

With best regards,
Afzal Jan

On Feb 27, 12:50 am, Shay Banon kim...@gmail.com wrote:

So that will cause it, you are overloading ES with many concurrent index requests. If you already do threading, then wait for a response. Btw, you send a refresh right after sending the index request, you shouldn't do it, since its both inefficient, as well as meaningless since you don't wait for the index request to happen.

On Sunday, February 26, 2012 at 4:48 PM, ajan wrote:

No, I don't wait for response. The part of source is gist here;

https://gist.github.com/56a1587a8384fc5ca364

With best regards,
Afzal Jan

On Feb 26, 2:58 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Do you wait for a response for each index request, or do you just fire them without "actionGet"/"get" the response? If so, you might end up overloading ES with many concurrent requests. You can configure ES to throttle such cases, but lets see if thats the case first.

On Thursday, February 23, 2012 at 11:34 AM, ajan wrote:

I tried fresh with 0.18.7 and got the same OOM

More errors athttps://gist.github.com/75255df466d9c779ce30(http://gist.github.com/75255df466d9c779ce30)

With best regards,
Afzal Jan

On Feb 23, 11:04 am, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(Shay Banon) #12

It wasn't really indexing 500 docs per second, it was basically a fire and forget indexing request. In your API, make sure you don't call refresh after each index request, and if you can, use the bulk API instead of indexing each doc separately.

On Monday, February 27, 2012 at 2:35 PM, ajan wrote:

Looks like that resolved, however, it has reduced the no of docs
indexed per second ~50 now after doing the "actionGet/Get". Previously
it used to index ~500 docs per second.

I already do threading and try to index docs concurrently. In this
tests I add the docs to the same index. Going forward I would have to
add docs to multiple indices concurrently. I need this concurrency for
our product that has many services who could use ES cluster to index
and search through docs of their and other services.

I will tweak my tests to see how much of concurrency is acceptable. If
you have information on this that would be good.

With best regards,
Afzal Jan

On Feb 27, 12:50 am, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

So that will cause it, you are overloading ES with many concurrent index requests. If you already do threading, then wait for a response. Btw, you send a refresh right after sending the index request, you shouldn't do it, since its both inefficient, as well as meaningless since you don't wait for the index request to happen.

On Sunday, February 26, 2012 at 4:48 PM, ajan wrote:

No, I don't wait for response. The part of source is gist here;

https://gist.github.com/56a1587a8384fc5ca364

With best regards,
Afzal Jan

On Feb 26, 2:58 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Do you wait for a response for each index request, or do you just fire them without "actionGet"/"get" the response? If so, you might end up overloading ES with many concurrent requests. You can configure ES to throttle such cases, but lets see if thats the case first.

On Thursday, February 23, 2012 at 11:34 AM, ajan wrote:

I tried fresh with 0.18.7 and got the same OOM

More errors athttps://gist.github.com/75255df466d9c779ce30(http://gist.github.com/75255df466d9c779ce30)

With best regards,
Afzal Jan

On Feb 23, 11:04 am, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}}}}}}

With best regards,
Afzal Jan


(ajan) #13

Great, thanks, the issue is resolved.

Now I have about ~200K documents and the cluster is stable and all my
queries are returned really quick. That too with the default startup
options. I didn't have to allocate more memory. This is great.

I tried the bulk api and when I send a bulk request for about ~800
documents it takes about 4 seconds. Is that expected?

With best regards,
Afzal Jan

On Feb 27, 6:05 pm, Shay Banon kim...@gmail.com wrote:

It wasn't really indexing 500 docs per second, it was basically a fire and forget indexing request. In your API, make sure you don't call refresh after each index request, and if you can, use the bulk API instead of indexing each doc separately.

On Monday, February 27, 2012 at 2:35 PM, ajan wrote:

Looks like that resolved, however, it has reduced the no of docs
indexed per second ~50 now after doing the "actionGet/Get". Previously
it used to index ~500 docs per second.

I already do threading and try to index docs concurrently. In this
tests I add the docs to the same index. Going forward I would have to
add docs to multiple indices concurrently. I need this concurrency for
our product that has many services who could use ES cluster to index
and search through docs of their and other services.

I will tweak my tests to see how much of concurrency is acceptable. If
you have information on this that would be good.

With best regards,
Afzal Jan

On Feb 27, 12:50 am, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

So that will cause it, you are overloading ES with many concurrent index requests. If you already do threading, then wait for a response. Btw, you send a refresh right after sending the index request, you shouldn't do it, since its both inefficient, as well as meaningless since you don't wait for the index request to happen.

On Sunday, February 26, 2012 at 4:48 PM, ajan wrote:

No, I don't wait for response. The part of source is gist here;

https://gist.github.com/56a1587a8384fc5ca364

With best regards,
Afzal Jan

On Feb 26, 2:58 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Do you wait for a response for each index request, or do you just fire them without "actionGet"/"get" the response? If so, you might end up overloading ES with many concurrent requests. You can configure ES to throttle such cases, but lets see if thats the case first.

On Thursday, February 23, 2012 at 11:34 AM, ajan wrote:

I tried fresh with 0.18.7 and got the same OOM

More errors athttps://gist.github.com/75255df466d9c779ce30(http://gist.github.com/7...)

With best regards,
Afzal Jan

On Feb 23, 11:04 am, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":

...

read more »


(Shay Banon) #14

You are asking me if its expected? :).

On Thursday, March 1, 2012 at 6:45 AM, ajan wrote:

Great, thanks, the issue is resolved.

Now I have about ~200K documents and the cluster is stable and all my
queries are returned really quick. That too with the default startup
options. I didn't have to allocate more memory. This is great.

I tried the bulk api and when I send a bulk request for about ~800
documents it takes about 4 seconds. Is that expected?

With best regards,
Afzal Jan

On Feb 27, 6:05 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

It wasn't really indexing 500 docs per second, it was basically a fire and forget indexing request. In your API, make sure you don't call refresh after each index request, and if you can, use the bulk API instead of indexing each doc separately.

On Monday, February 27, 2012 at 2:35 PM, ajan wrote:

Looks like that resolved, however, it has reduced the no of docs
indexed per second ~50 now after doing the "actionGet/Get". Previously
it used to index ~500 docs per second.

I already do threading and try to index docs concurrently. In this
tests I add the docs to the same index. Going forward I would have to
add docs to multiple indices concurrently. I need this concurrency for
our product that has many services who could use ES cluster to index
and search through docs of their and other services.

I will tweak my tests to see how much of concurrency is acceptable. If
you have information on this that would be good.

With best regards,
Afzal Jan

On Feb 27, 12:50 am, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

So that will cause it, you are overloading ES with many concurrent index requests. If you already do threading, then wait for a response. Btw, you send a refresh right after sending the index request, you shouldn't do it, since its both inefficient, as well as meaningless since you don't wait for the index request to happen.

On Sunday, February 26, 2012 at 4:48 PM, ajan wrote:

No, I don't wait for response. The part of source is gist here;

https://gist.github.com/56a1587a8384fc5ca364

With best regards,
Afzal Jan

On Feb 26, 2:58 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Do you wait for a response for each index request, or do you just fire them without "actionGet"/"get" the response? If so, you might end up overloading ES with many concurrent requests. You can configure ES to throttle such cases, but lets see if thats the case first.

On Thursday, February 23, 2012 at 11:34 AM, ajan wrote:

I tried fresh with 0.18.7 and got the same OOM

More errors athttps://gist.github.com/75255df466d9c779ce30(http://gist.github.com/7...)

With best regards,
Afzal Jan

On Feb 23, 11:04 am, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
16103,"query_time":"42.8s","query_time_in_millis":
42868,"query_current":0,"fetch_total":
6433,"fetch_time":"1.6s","fetch_time_in_millis":1671,"fetch_current":
0}},"indices":{"managedobject":{"primaries":{"docs":{"count":
37775,"deleted":33},"store":{"size":"348.5mb","size_in_bytes":
365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":

...

read more »


(ajan) #15

Yes, does it really take 4 seconds to bulk index ~800 docs? Shouldn't
this be much faster?

Do I miss something that it is taking a longer time to index ~800
docs.

On Mar 1, 5:33 pm, Shay Banon kim...@gmail.com wrote:

You are asking me if its expected? :).

On Thursday, March 1, 2012 at 6:45 AM, ajan wrote:

Great, thanks, the issue is resolved.

Now I have about ~200K documents and the cluster is stable and all my
queries are returned really quick. That too with the default startup
options. I didn't have to allocate more memory. This is great.

I tried the bulk api and when I send a bulk request for about ~800
documents it takes about 4 seconds. Is that expected?

With best regards,
Afzal Jan

On Feb 27, 6:05 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

It wasn't really indexing 500 docs per second, it was basically a fire and forget indexing request. In your API, make sure you don't call refresh after each index request, and if you can, use the bulk API instead of indexing each doc separately.

On Monday, February 27, 2012 at 2:35 PM, ajan wrote:

Looks like that resolved, however, it has reduced the no of docs
indexed per second ~50 now after doing the "actionGet/Get". Previously
it used to index ~500 docs per second.

I already do threading and try to index docs concurrently. In this
tests I add the docs to the same index. Going forward I would have to
add docs to multiple indices concurrently. I need this concurrency for
our product that has many services who could use ES cluster to index
and search through docs of their and other services.

I will tweak my tests to see how much of concurrency is acceptable. If
you have information on this that would be good.

With best regards,
Afzal Jan

On Feb 27, 12:50 am, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

So that will cause it, you are overloading ES with many concurrent index requests. If you already do threading, then wait for a response. Btw, you send a refresh right after sending the index request, you shouldn't do it, since its both inefficient, as well as meaningless since you don't wait for the index request to happen.

On Sunday, February 26, 2012 at 4:48 PM, ajan wrote:

No, I don't wait for response. The part of source is gist here;

https://gist.github.com/56a1587a8384fc5ca364

With best regards,
Afzal Jan

On Feb 26, 2:58 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Do you wait for a response for each index request, or do you just fire them without "actionGet"/"get" the response? If so, you might end up overloading ES with many concurrent requests. You can configure ES to throttle such cases, but lets see if thats the case first.

On Thursday, February 23, 2012 at 11:34 AM, ajan wrote:

I tried fresh with 0.18.7 and got the same OOM

More errors athttps://gist.github.com/75255df466d9c779ce30(http://gist.github.com/7...)

With best regards,
Afzal Jan

On Feb 23, 11:04 am, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":

...

read more »


(Shay Banon) #16

Bulk will make things faster, but how much faster really depends on the doc data you index. But it looks like you got from 50 to 200 docs per second with bulk, which is 4x, no?

On Monday, March 5, 2012 at 9:56 AM, ajan wrote:

Yes, does it really take 4 seconds to bulk index ~800 docs? Shouldn't
this be much faster?

Do I miss something that it is taking a longer time to index ~800
docs.

On Mar 1, 5:33 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

You are asking me if its expected? :).

On Thursday, March 1, 2012 at 6:45 AM, ajan wrote:

Great, thanks, the issue is resolved.

Now I have about ~200K documents and the cluster is stable and all my
queries are returned really quick. That too with the default startup
options. I didn't have to allocate more memory. This is great.

I tried the bulk api and when I send a bulk request for about ~800
documents it takes about 4 seconds. Is that expected?

With best regards,
Afzal Jan

On Feb 27, 6:05 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

It wasn't really indexing 500 docs per second, it was basically a fire and forget indexing request. In your API, make sure you don't call refresh after each index request, and if you can, use the bulk API instead of indexing each doc separately.

On Monday, February 27, 2012 at 2:35 PM, ajan wrote:

Looks like that resolved, however, it has reduced the no of docs
indexed per second ~50 now after doing the "actionGet/Get". Previously
it used to index ~500 docs per second.

I already do threading and try to index docs concurrently. In this
tests I add the docs to the same index. Going forward I would have to
add docs to multiple indices concurrently. I need this concurrency for
our product that has many services who could use ES cluster to index
and search through docs of their and other services.

I will tweak my tests to see how much of concurrency is acceptable. If
you have information on this that would be good.

With best regards,
Afzal Jan

On Feb 27, 12:50 am, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

So that will cause it, you are overloading ES with many concurrent index requests. If you already do threading, then wait for a response. Btw, you send a refresh right after sending the index request, you shouldn't do it, since its both inefficient, as well as meaningless since you don't wait for the index request to happen.

On Sunday, February 26, 2012 at 4:48 PM, ajan wrote:

No, I don't wait for response. The part of source is gist here;

https://gist.github.com/56a1587a8384fc5ca364

With best regards,
Afzal Jan

On Feb 26, 2:58 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Do you wait for a response for each index request, or do you just fire them without "actionGet"/"get" the response? If so, you might end up overloading ES with many concurrent requests. You can configure ES to throttle such cases, but lets see if thats the case first.

On Thursday, February 23, 2012 at 11:34 AM, ajan wrote:

I tried fresh with 0.18.7 and got the same OOM

More errors athttps://gist.github.com/75255df466d9c779ce30(http://gist.github.com/7...)

With best regards,
Afzal Jan

On Feb 23, 11:04 am, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

Firstly, apologies, it's not 4K it is actually ~40K docs.

how do you index the data?
Using native Java API

Is it a single bulk request?
Single requests

Multiple concurrent ones?
Yes, multiple requests are made, ~50 requests per second.

Do you open a connection for each index request?
I have tests that reuse the connection and other tests that create a
new connection everytime. I have executed both these tests. Not sure
which one would have caused this error.

How big are the docs?
each doc is ~40kb

On Feb 22, 5:38 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

No, you can safely ignore those errors in the log (I will fix it so they won't be logged).

Regarding the OOM, there really shouldn't be a reason why you can't index 4k docs with the default settings. how do you index the data? Is it a single bulk request? Multiple concurrent ones? Do you open a connection for each index request? How big are the docs?

On Wednesday, February 22, 2012 at 12:55 PM, ajan wrote:

I see a lot of these errors too in the logs

[2012-02-22 15:55:33,891][WARN ][index.translog ] [Man-Ape]
[managedobject][2] failed to flush shard on translog threshold
org.elasticsearch.index.shard.IllegalIndexShardStateException:
[managedobject][2] CurrentState[CREATED] write operation only allowed
when started/recovering
at
org.elasticsearch.index.shard.service.InternalIndexShard.verifyStartedOrRec overing(InternalIndexShard.java:
678)
at
org.elasticsearch.index.shard.service.InternalIndexShard.flush(InternalInde xShard.java:
497)
at org.elasticsearch.index.translog.TranslogService
$TranslogBasedFlush$1.run(TranslogService.java:173)
at java.util.concurrent.ThreadPoolExecutor
$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor
$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)

Are they related to the OOM?

On Feb 22, 3:52 pm, ajan <jan.af...@gmail.com (http://gmail.com)> wrote:

https://gist.github.com/1884109

On Feb 21, 9:10 pm, Shay Banon <kim...@gmail.com (http://gmail.com)> wrote:

Can you gist the OOM you got from the logs?

On Tuesday, February 21, 2012 at 3:02 PM, ajan wrote:

Hi!

I'm seeing OOM errors while using 0.19.0.RC2 with default startup
script with just about 4K docs indexed.

Is there anything wrong in what I'm doing?

Here is the _stats output.

{"ok":true,"_shards":{"total":10,"successful":9,"failed":0},"_all":
{"primaries":{"docs":{"count":37775,"deleted":33},"store":
{"size":"348.5mb","size_in_bytes":365472567},"indexing":{"index_total":
35257,"index_time":"1d","index_time_in_millis":
93317704,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":
12882,"query_time":"34.9s","query_time_in_millis":
34972,"query_current":0,"fetch_total":
3217,"fetch_time":"691ms","fetch_time_in_millis":691,"fetch_current":
0}},"total":{"docs":{"count":74962,"deleted":54},"store":
{"size":"691.8mb","size_in_bytes":725415500},"indexing":{"index_total":
37269,"index_time":"2d","index_time_in_millis":
175828485,"index_current":0,"delete_total":
1,"delete_time":"0s","delete_time_in_millis":0,"delete_current":
0},"get":{"total":3,"time":"4ms","time_in_millis":4,"exists_total":
1,"exists_time":"1ms","exists_time_in_millis":1,"missing_total":
2,"missing_time":"3ms","missing_time_in_millis":3,"current":
0},"search":{"query_total":

...

read more »


(system) #17