Issue with shard replicas being different in size

Yesterday I did a full reindex of an index with ~8 million documents.
I had 3 nodes on EC2 large instances. There were 8 clients indexing
documents at the rate of ~1100 documents per minute per client.

Today I did some match_all count queries to verify the index size, and
I notice that every other result is different: sometimes 5m, sometimes
8m. I look at the index status and I see that two shards have very
differently sized replicas: The primaries have <120k documents, and
the replicas have >1.3m documents.

I tried stopping nodes to see if I could get the correct replicas to
become the primaries, but it seems they may be stored in the gateway
as the much smaller size rather than the correct size. It appears the
behavior is that if a replica goes down, it is restored from the
primary, but if a primary goes down, it is restored from the gateway.

Here is the output from the index status command. Notice that shard 4
has a big difference between the size of the replica and the primary.
Shard 3 originally had the same issue, but the correct replica was
lost when the node with that replica was accidentally restarted
(oops!)

{
"ok" : true,
"_shards" : {
"total" : 12,
"successful" : 12,
"failed" : 0
},
"indices" : {
"stats" : {
"aliases" : [ ],
"settings" : {
"index.number_of_shards" : "6",
"index.number_of_replicas" : "1"
},
"store_size" : "20.3g",
"store_size_in_bytes" : 21814342869,
"estimated_flushable_memory_size" : "96.5m",
"estimated_flushable_memory_size_in_bytes" : 101234949,
"translog_operations" : 102062,
"docs" : {
"num_docs" : 5738311,
"max_doc" : 5744803,
"deleted_docs" : 6492
},
"shards" : {
"0" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 0,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.6g",
"store_size_in_bytes" : 2842186930,
"estimated_flushable_memory_size" : "298.8k",
"estimated_flushable_memory_size_in_bytes" : 306021,
"translog_id" : 0,
"translog_operations" : 306,
"docs" : {
"num_docs" : 1352920,
"max_doc" : 1352920,
"deleted_docs" : 0
}
}, {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 0,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.6g",
"store_size_in_bytes" : 2842196644,
"estimated_flushable_memory_size" : "298.8k",
"estimated_flushable_memory_size_in_bytes" : 306021,
"translog_id" : 1,
"translog_operations" : 306,
"docs" : {
"num_docs" : 1352920,
"max_doc" : 1352920,
"deleted_docs" : 0
}
} ],
"1" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 1,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2109353140,
"estimated_flushable_memory_size" : "304.8k",
"estimated_flushable_memory_size_in_bytes" : 312123,
"translog_id" : 0,
"translog_operations" : 323,
"docs" : {
"num_docs" : 1413888,
"max_doc" : 1418829,
"deleted_docs" : 4941
}
}, {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 1,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2109369148,
"estimated_flushable_memory_size" : "304.8k",
"estimated_flushable_memory_size_in_bytes" : 312123,
"translog_id" : 1,
"translog_operations" : 323,
"docs" : {
"num_docs" : 1413888,
"max_doc" : 1418829,
"deleted_docs" : 4941
}
} ],
"2" : [ {
"routing" : {
"state" : "RELOCATING",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 2,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2050764870,
"estimated_flushable_memory_size" : "877.4k",
"estimated_flushable_memory_size_in_bytes" : 898541,
"translog_id" : 0,
"translog_operations" : 897,
"docs" : {
"num_docs" : 1412645,
"max_doc" : 1413300,
"deleted_docs" : 655
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 2,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2050751585,
"estimated_flushable_memory_size" : "876.7k",
"estimated_flushable_memory_size_in_bytes" : 897826,
"translog_id" : 0,
"translog_operations" : 896,
"docs" : {
"num_docs" : 1412644,
"max_doc" : 1413299,
"deleted_docs" : 655
}
} ],
"3" : [ {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 3,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "195m",
"store_size_in_bytes" : 204574035,
"estimated_flushable_memory_size" : "3.7m",
"estimated_flushable_memory_size_in_bytes" : 3949835,
"translog_id" : 0,
"translog_operations" : 3842,
"docs" : {
"num_docs" : 103921,
"max_doc" : 103921,
"deleted_docs" : 0
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 3,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "195m",
"store_size_in_bytes" : 204572396,
"estimated_flushable_memory_size" : "3.7m",
"estimated_flushable_memory_size_in_bytes" : 3949835,
"translog_id" : 0,
"translog_operations" : 3842,
"docs" : {
"num_docs" : 103921,
"max_doc" : 103921,
"deleted_docs" : 0
}
} ],
"4" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 4,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2106604638,
"estimated_flushable_memory_size" : "40.9m",
"estimated_flushable_memory_size_in_bytes" : 42929689,
"translog_id" : 262,
"translog_operations" : 43410,
"docs" : {
"num_docs" : 1415174,
"max_doc" : 1419993,
"deleted_docs" : 4819
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 4,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "104.9m",
"store_size_in_bytes" : 110022541,
"estimated_flushable_memory_size" : "3.2m",
"estimated_flushable_memory_size_in_bytes" : 3392079,
"translog_id" : 0,
"translog_operations" : 3311,
"docs" : {
"num_docs" : 38319,
"max_doc" : 38319,
"deleted_docs" : 0
}
} ],
"5" : [ {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 5,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.8g",
"store_size_in_bytes" : 3074996380,
"estimated_flushable_memory_size" : "41.9m",
"estimated_flushable_memory_size_in_bytes" : 43980856,
"translog_id" : 276,
"translog_operations" : 44606,
"docs" : {
"num_docs" : 1416619,
"max_doc" : 1417515,
"deleted_docs" : 896
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 5,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2108950562,
"estimated_flushable_memory_size" : "0",
"estimated_flushable_memory_size_in_bytes" : 0,
"translog_id" : 1,
"translog_operations" : 0,
"docs" : {
"num_docs" : 1416619,
"max_doc" : 1417515,
"deleted_docs" : 896
}
} ]
}
}
}

Hi,

The problem of the primary, when shutdown, it gets allocated and

restored from the gateway (instead of having one of the replicas become the
primary) is fixed in 0.9. The difference between the two is strange (replica
and primary). I have fixed in 0.9 some cases where this might happen during
relocation of shards, did that happen to you? Also, are you using the s3
gateway?

-shay.banon

On Sat, Jul 24, 2010 at 12:32 AM, Grant Rodgers grantr@gmail.com wrote:

Yesterday I did a full reindex of an index with ~8 million documents.
I had 3 nodes on EC2 large instances. There were 8 clients indexing
documents at the rate of ~1100 documents per minute per client.

Today I did some match_all count queries to verify the index size, and
I notice that every other result is different: sometimes 5m, sometimes
8m. I look at the index status and I see that two shards have very
differently sized replicas: The primaries have <120k documents, and
the replicas have >1.3m documents.

I tried stopping nodes to see if I could get the correct replicas to
become the primaries, but it seems they may be stored in the gateway
as the much smaller size rather than the correct size. It appears the
behavior is that if a replica goes down, it is restored from the
primary, but if a primary goes down, it is restored from the gateway.

Here is the output from the index status command. Notice that shard 4
has a big difference between the size of the replica and the primary.
Shard 3 originally had the same issue, but the correct replica was
lost when the node with that replica was accidentally restarted
(oops!)

{
"ok" : true,
"_shards" : {
"total" : 12,
"successful" : 12,
"failed" : 0
},
"indices" : {
"stats" : {
"aliases" : ,
"settings" : {
"index.number_of_shards" : "6",
"index.number_of_replicas" : "1"
},
"store_size" : "20.3g",
"store_size_in_bytes" : 21814342869,
"estimated_flushable_memory_size" : "96.5m",
"estimated_flushable_memory_size_in_bytes" : 101234949,
"translog_operations" : 102062,
"docs" : {
"num_docs" : 5738311,
"max_doc" : 5744803,
"deleted_docs" : 6492
},
"shards" : {
"0" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 0,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.6g",
"store_size_in_bytes" : 2842186930,
"estimated_flushable_memory_size" : "298.8k",
"estimated_flushable_memory_size_in_bytes" : 306021,
"translog_id" : 0,
"translog_operations" : 306,
"docs" : {
"num_docs" : 1352920,
"max_doc" : 1352920,
"deleted_docs" : 0
}
}, {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 0,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.6g",
"store_size_in_bytes" : 2842196644,
"estimated_flushable_memory_size" : "298.8k",
"estimated_flushable_memory_size_in_bytes" : 306021,
"translog_id" : 1,
"translog_operations" : 306,
"docs" : {
"num_docs" : 1352920,
"max_doc" : 1352920,
"deleted_docs" : 0
}
} ],
"1" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 1,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2109353140,
"estimated_flushable_memory_size" : "304.8k",
"estimated_flushable_memory_size_in_bytes" : 312123,
"translog_id" : 0,
"translog_operations" : 323,
"docs" : {
"num_docs" : 1413888,
"max_doc" : 1418829,
"deleted_docs" : 4941
}
}, {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 1,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2109369148,
"estimated_flushable_memory_size" : "304.8k",
"estimated_flushable_memory_size_in_bytes" : 312123,
"translog_id" : 1,
"translog_operations" : 323,
"docs" : {
"num_docs" : 1413888,
"max_doc" : 1418829,
"deleted_docs" : 4941
}
} ],
"2" : [ {
"routing" : {
"state" : "RELOCATING",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 2,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2050764870,
"estimated_flushable_memory_size" : "877.4k",
"estimated_flushable_memory_size_in_bytes" : 898541,
"translog_id" : 0,
"translog_operations" : 897,
"docs" : {
"num_docs" : 1412645,
"max_doc" : 1413300,
"deleted_docs" : 655
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 2,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2050751585,
"estimated_flushable_memory_size" : "876.7k",
"estimated_flushable_memory_size_in_bytes" : 897826,
"translog_id" : 0,
"translog_operations" : 896,
"docs" : {
"num_docs" : 1412644,
"max_doc" : 1413299,
"deleted_docs" : 655
}
} ],
"3" : [ {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 3,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "195m",
"store_size_in_bytes" : 204574035,
"estimated_flushable_memory_size" : "3.7m",
"estimated_flushable_memory_size_in_bytes" : 3949835,
"translog_id" : 0,
"translog_operations" : 3842,
"docs" : {
"num_docs" : 103921,
"max_doc" : 103921,
"deleted_docs" : 0
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 3,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "195m",
"store_size_in_bytes" : 204572396,
"estimated_flushable_memory_size" : "3.7m",
"estimated_flushable_memory_size_in_bytes" : 3949835,
"translog_id" : 0,
"translog_operations" : 3842,
"docs" : {
"num_docs" : 103921,
"max_doc" : 103921,
"deleted_docs" : 0
}
} ],
"4" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 4,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2106604638,
"estimated_flushable_memory_size" : "40.9m",
"estimated_flushable_memory_size_in_bytes" : 42929689,
"translog_id" : 262,
"translog_operations" : 43410,
"docs" : {
"num_docs" : 1415174,
"max_doc" : 1419993,
"deleted_docs" : 4819
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 4,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "104.9m",
"store_size_in_bytes" : 110022541,
"estimated_flushable_memory_size" : "3.2m",
"estimated_flushable_memory_size_in_bytes" : 3392079,
"translog_id" : 0,
"translog_operations" : 3311,
"docs" : {
"num_docs" : 38319,
"max_doc" : 38319,
"deleted_docs" : 0
}
} ],
"5" : [ {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 5,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.8g",
"store_size_in_bytes" : 3074996380,
"estimated_flushable_memory_size" : "41.9m",
"estimated_flushable_memory_size_in_bytes" : 43980856,
"translog_id" : 276,
"translog_operations" : 44606,
"docs" : {
"num_docs" : 1416619,
"max_doc" : 1417515,
"deleted_docs" : 896
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 5,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2108950562,
"estimated_flushable_memory_size" : "0",
"estimated_flushable_memory_size_in_bytes" : 0,
"translog_id" : 1,
"translog_operations" : 0,
"docs" : {
"num_docs" : 1416619,
"max_doc" : 1417515,
"deleted_docs" : 896
}
} ]
}
}
}

Yes, actually, after the index we also replaced two of the nodes. The
procedure we used was to stop one node, wait for the shards to
stabilize on the other two, then start the new one, and wait for the
shards to stabilize again. We did this twice, so there was a lot of
relocating going on. Unfortunately I didn't test the numbers before
the node swaps, but we are doing a full reindex this weekend so we
will see if the issue comes up again without them. We are using 0.8.0
and the s3 gateway.

We're looking forward to 0.9 quite keenly. The improved shard
management mentioned here, plus work directory reuse, new cloud
gateway, and new facets are all things we can use! Any idea of a
timetable for the release?

Thanks,
Grant

On Jul 24, 2:56 am, Shay Banon shay.ba...@elasticsearch.com wrote:

Hi,

The problem of the primary, when shutdown, it gets allocated and

restored from the gateway (instead of having one of the replicas become the
primary) is fixed in 0.9. The difference between the two is strange (replica
and primary). I have fixed in 0.9 some cases where this might happen during
relocation of shards, did that happen to you? Also, are you using the s3
gateway?

-shay.banon

On Sat, Jul 24, 2010 at 12:32 AM, Grant Rodgers gra...@gmail.com wrote:

Yesterday I did a full reindex of an index with ~8 million documents.
I had 3 nodes on EC2 large instances. There were 8 clients indexing
documents at the rate of ~1100 documents per minute per client.

Today I did some match_all count queries to verify the index size, and
I notice that every other result is different: sometimes 5m, sometimes
8m. I look at the index status and I see that two shards have very
differently sized replicas: The primaries have <120k documents, and
the replicas have >1.3m documents.

I tried stopping nodes to see if I could get the correct replicas to
become the primaries, but it seems they may be stored in the gateway
as the much smaller size rather than the correct size. It appears the
behavior is that if a replica goes down, it is restored from the
primary, but if a primary goes down, it is restored from the gateway.

Here is the output from the index status command. Notice that shard 4
has a big difference between the size of the replica and the primary.
Shard 3 originally had the same issue, but the correct replica was
lost when the node with that replica was accidentally restarted
(oops!)

{
"ok" : true,
"_shards" : {
"total" : 12,
"successful" : 12,
"failed" : 0
},
"indices" : {
"stats" : {
"aliases" : ,
"settings" : {
"index.number_of_shards" : "6",
"index.number_of_replicas" : "1"
},
"store_size" : "20.3g",
"store_size_in_bytes" : 21814342869,
"estimated_flushable_memory_size" : "96.5m",
"estimated_flushable_memory_size_in_bytes" : 101234949,
"translog_operations" : 102062,
"docs" : {
"num_docs" : 5738311,
"max_doc" : 5744803,
"deleted_docs" : 6492
},
"shards" : {
"0" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 0,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.6g",
"store_size_in_bytes" : 2842186930,
"estimated_flushable_memory_size" : "298.8k",
"estimated_flushable_memory_size_in_bytes" : 306021,
"translog_id" : 0,
"translog_operations" : 306,
"docs" : {
"num_docs" : 1352920,
"max_doc" : 1352920,
"deleted_docs" : 0
}
}, {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 0,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.6g",
"store_size_in_bytes" : 2842196644,
"estimated_flushable_memory_size" : "298.8k",
"estimated_flushable_memory_size_in_bytes" : 306021,
"translog_id" : 1,
"translog_operations" : 306,
"docs" : {
"num_docs" : 1352920,
"max_doc" : 1352920,
"deleted_docs" : 0
}
} ],
"1" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 1,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2109353140,
"estimated_flushable_memory_size" : "304.8k",
"estimated_flushable_memory_size_in_bytes" : 312123,
"translog_id" : 0,
"translog_operations" : 323,
"docs" : {
"num_docs" : 1413888,
"max_doc" : 1418829,
"deleted_docs" : 4941
}
}, {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 1,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2109369148,
"estimated_flushable_memory_size" : "304.8k",
"estimated_flushable_memory_size_in_bytes" : 312123,
"translog_id" : 1,
"translog_operations" : 323,
"docs" : {
"num_docs" : 1413888,
"max_doc" : 1418829,
"deleted_docs" : 4941
}
} ],
"2" : [ {
"routing" : {
"state" : "RELOCATING",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 2,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2050764870,
"estimated_flushable_memory_size" : "877.4k",
"estimated_flushable_memory_size_in_bytes" : 898541,
"translog_id" : 0,
"translog_operations" : 897,
"docs" : {
"num_docs" : 1412645,
"max_doc" : 1413300,
"deleted_docs" : 655
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 2,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2050751585,
"estimated_flushable_memory_size" : "876.7k",
"estimated_flushable_memory_size_in_bytes" : 897826,
"translog_id" : 0,
"translog_operations" : 896,
"docs" : {
"num_docs" : 1412644,
"max_doc" : 1413299,
"deleted_docs" : 655
}
} ],
"3" : [ {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 3,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "195m",
"store_size_in_bytes" : 204574035,
"estimated_flushable_memory_size" : "3.7m",
"estimated_flushable_memory_size_in_bytes" : 3949835,
"translog_id" : 0,
"translog_operations" : 3842,
"docs" : {
"num_docs" : 103921,
"max_doc" : 103921,
"deleted_docs" : 0
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 3,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "195m",
"store_size_in_bytes" : 204572396,
"estimated_flushable_memory_size" : "3.7m",
"estimated_flushable_memory_size_in_bytes" : 3949835,
"translog_id" : 0,
"translog_operations" : 3842,
"docs" : {
"num_docs" : 103921,
"max_doc" : 103921,
"deleted_docs" : 0
}
} ],
"4" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 4,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2106604638,
"estimated_flushable_memory_size" : "40.9m",
"estimated_flushable_memory_size_in_bytes" : 42929689,
"translog_id" : 262,
"translog_operations" : 43410,
"docs" : {
"num_docs" : 1415174,
"max_doc" : 1419993,
"deleted_docs" : 4819
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 4,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "104.9m",
"store_size_in_bytes" : 110022541,
"estimated_flushable_memory_size" : "3.2m",
"estimated_flushable_memory_size_in_bytes" : 3392079,
"translog_id" : 0,
"translog_operations" : 3311,
"docs" : {
"num_docs" : 38319,
"max_doc" : 38319,
"deleted_docs" : 0
}
} ],
"5" : [ {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 5,
"index" : "stats"

...

read more »

Hi,

Can you wait with the reindexing? Almost done with the new AWS specific
cloud plugin, once its done, will ping this mailing list with docs on how to
use it. For now, decided to go with the formal AWS SDK to simplify things,
though in the future might implement a proper non blocking myself.

-shay.banon

On Sat, Jul 24, 2010 at 10:32 PM, Grant Rodgers grantr@gmail.com wrote:

Yes, actually, after the index we also replaced two of the nodes. The
procedure we used was to stop one node, wait for the shards to
stabilize on the other two, then start the new one, and wait for the
shards to stabilize again. We did this twice, so there was a lot of
relocating going on. Unfortunately I didn't test the numbers before
the node swaps, but we are doing a full reindex this weekend so we
will see if the issue comes up again without them. We are using 0.8.0
and the s3 gateway.

We're looking forward to 0.9 quite keenly. The improved shard
management mentioned here, plus work directory reuse, new cloud
gateway, and new facets are all things we can use! Any idea of a
timetable for the release?

Thanks,
Grant

On Jul 24, 2:56 am, Shay Banon shay.ba...@elasticsearch.com wrote:

Hi,

The problem of the primary, when shutdown, it gets allocated and

restored from the gateway (instead of having one of the replicas become
the
primary) is fixed in 0.9. The difference between the two is strange
(replica
and primary). I have fixed in 0.9 some cases where this might happen
during
relocation of shards, did that happen to you? Also, are you using the s3
gateway?

-shay.banon

On Sat, Jul 24, 2010 at 12:32 AM, Grant Rodgers gra...@gmail.com
wrote:

Yesterday I did a full reindex of an index with ~8 million documents.
I had 3 nodes on EC2 large instances. There were 8 clients indexing
documents at the rate of ~1100 documents per minute per client.

Today I did some match_all count queries to verify the index size, and
I notice that every other result is different: sometimes 5m, sometimes
8m. I look at the index status and I see that two shards have very
differently sized replicas: The primaries have <120k documents, and
the replicas have >1.3m documents.

I tried stopping nodes to see if I could get the correct replicas to
become the primaries, but it seems they may be stored in the gateway
as the much smaller size rather than the correct size. It appears the
behavior is that if a replica goes down, it is restored from the
primary, but if a primary goes down, it is restored from the gateway.

Here is the output from the index status command. Notice that shard 4
has a big difference between the size of the replica and the primary.
Shard 3 originally had the same issue, but the correct replica was
lost when the node with that replica was accidentally restarted
(oops!)

{
"ok" : true,
"_shards" : {
"total" : 12,
"successful" : 12,
"failed" : 0
},
"indices" : {
"stats" : {
"aliases" : ,
"settings" : {
"index.number_of_shards" : "6",
"index.number_of_replicas" : "1"
},
"store_size" : "20.3g",
"store_size_in_bytes" : 21814342869,
"estimated_flushable_memory_size" : "96.5m",
"estimated_flushable_memory_size_in_bytes" : 101234949,
"translog_operations" : 102062,
"docs" : {
"num_docs" : 5738311,
"max_doc" : 5744803,
"deleted_docs" : 6492
},
"shards" : {
"0" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 0,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.6g",
"store_size_in_bytes" : 2842186930,
"estimated_flushable_memory_size" : "298.8k",
"estimated_flushable_memory_size_in_bytes" : 306021,
"translog_id" : 0,
"translog_operations" : 306,
"docs" : {
"num_docs" : 1352920,
"max_doc" : 1352920,
"deleted_docs" : 0
}
}, {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 0,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.6g",
"store_size_in_bytes" : 2842196644,
"estimated_flushable_memory_size" : "298.8k",
"estimated_flushable_memory_size_in_bytes" : 306021,
"translog_id" : 1,
"translog_operations" : 306,
"docs" : {
"num_docs" : 1352920,
"max_doc" : 1352920,
"deleted_docs" : 0
}
} ],
"1" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 1,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2109353140,
"estimated_flushable_memory_size" : "304.8k",
"estimated_flushable_memory_size_in_bytes" : 312123,
"translog_id" : 0,
"translog_operations" : 323,
"docs" : {
"num_docs" : 1413888,
"max_doc" : 1418829,
"deleted_docs" : 4941
}
}, {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 1,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2109369148,
"estimated_flushable_memory_size" : "304.8k",
"estimated_flushable_memory_size_in_bytes" : 312123,
"translog_id" : 1,
"translog_operations" : 323,
"docs" : {
"num_docs" : 1413888,
"max_doc" : 1418829,
"deleted_docs" : 4941
}
} ],
"2" : [ {
"routing" : {
"state" : "RELOCATING",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 2,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2050764870,
"estimated_flushable_memory_size" : "877.4k",
"estimated_flushable_memory_size_in_bytes" : 898541,
"translog_id" : 0,
"translog_operations" : 897,
"docs" : {
"num_docs" : 1412645,
"max_doc" : 1413300,
"deleted_docs" : 655
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 2,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2050751585,
"estimated_flushable_memory_size" : "876.7k",
"estimated_flushable_memory_size_in_bytes" : 897826,
"translog_id" : 0,
"translog_operations" : 896,
"docs" : {
"num_docs" : 1412644,
"max_doc" : 1413299,
"deleted_docs" : 655
}
} ],
"3" : [ {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 3,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "195m",
"store_size_in_bytes" : 204574035,
"estimated_flushable_memory_size" : "3.7m",
"estimated_flushable_memory_size_in_bytes" : 3949835,
"translog_id" : 0,
"translog_operations" : 3842,
"docs" : {
"num_docs" : 103921,
"max_doc" : 103921,
"deleted_docs" : 0
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 3,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "195m",
"store_size_in_bytes" : 204572396,
"estimated_flushable_memory_size" : "3.7m",
"estimated_flushable_memory_size_in_bytes" : 3949835,
"translog_id" : 0,
"translog_operations" : 3842,
"docs" : {
"num_docs" : 103921,
"max_doc" : 103921,
"deleted_docs" : 0
}
} ],
"4" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 4,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2106604638,
"estimated_flushable_memory_size" : "40.9m",
"estimated_flushable_memory_size_in_bytes" : 42929689,
"translog_id" : 262,
"translog_operations" : 43410,
"docs" : {
"num_docs" : 1415174,
"max_doc" : 1419993,
"deleted_docs" : 4819
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 4,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "104.9m",
"store_size_in_bytes" : 110022541,
"estimated_flushable_memory_size" : "3.2m",
"estimated_flushable_memory_size_in_bytes" : 3392079,
"translog_id" : 0,
"translog_operations" : 3311,
"docs" : {
"num_docs" : 38319,
"max_doc" : 38319,
"deleted_docs" : 0
}
} ],
"5" : [ {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 5,
"index" : "stats"

...

read more »

We will reindex again when 0.9 is released; this index is not user-
facing so we can afford some downtime.

On Jul 24, 12:47 pm, Shay Banon shay.ba...@elasticsearch.com wrote:

Hi,

Can you wait with the reindexing? Almost done with the new AWS specific
cloud plugin, once its done, will ping this mailing list with docs on how to
use it. For now, decided to go with the formal AWS SDK to simplify things,
though in the future might implement a proper non blocking myself.

-shay.banon

On Sat, Jul 24, 2010 at 10:32 PM, Grant Rodgers gra...@gmail.com wrote:

Yes, actually, after the index we also replaced two of the nodes. The
procedure we used was to stop one node, wait for the shards to
stabilize on the other two, then start the new one, and wait for the
shards to stabilize again. We did this twice, so there was a lot of
relocating going on. Unfortunately I didn't test the numbers before
the node swaps, but we are doing a full reindex this weekend so we
will see if the issue comes up again without them. We are using 0.8.0
and the s3 gateway.

We're looking forward to 0.9 quite keenly. The improved shard
management mentioned here, plus work directory reuse, new cloud
gateway, and new facets are all things we can use! Any idea of a
timetable for the release?

Thanks,
Grant

On Jul 24, 2:56 am, Shay Banon shay.ba...@elasticsearch.com wrote:

Hi,

The problem of the primary, when shutdown, it gets allocated and

restored from the gateway (instead of having one of the replicas become
the
primary) is fixed in 0.9. The difference between the two is strange
(replica
and primary). I have fixed in 0.9 some cases where this might happen
during
relocation of shards, did that happen to you? Also, are you using the s3
gateway?

-shay.banon

On Sat, Jul 24, 2010 at 12:32 AM, Grant Rodgers gra...@gmail.com
wrote:

Yesterday I did a full reindex of an index with ~8 million documents.
I had 3 nodes on EC2 large instances. There were 8 clients indexing
documents at the rate of ~1100 documents per minute per client.

Today I did some match_all count queries to verify the index size, and
I notice that every other result is different: sometimes 5m, sometimes
8m. I look at the index status and I see that two shards have very
differently sized replicas: The primaries have <120k documents, and
the replicas have >1.3m documents.

I tried stopping nodes to see if I could get the correct replicas to
become the primaries, but it seems they may be stored in the gateway
as the much smaller size rather than the correct size. It appears the
behavior is that if a replica goes down, it is restored from the
primary, but if a primary goes down, it is restored from the gateway.

Here is the output from the index status command. Notice that shard 4
has a big difference between the size of the replica and the primary.
Shard 3 originally had the same issue, but the correct replica was
lost when the node with that replica was accidentally restarted
(oops!)

{
"ok" : true,
"_shards" : {
"total" : 12,
"successful" : 12,
"failed" : 0
},
"indices" : {
"stats" : {
"aliases" : ,
"settings" : {
"index.number_of_shards" : "6",
"index.number_of_replicas" : "1"
},
"store_size" : "20.3g",
"store_size_in_bytes" : 21814342869,
"estimated_flushable_memory_size" : "96.5m",
"estimated_flushable_memory_size_in_bytes" : 101234949,
"translog_operations" : 102062,
"docs" : {
"num_docs" : 5738311,
"max_doc" : 5744803,
"deleted_docs" : 6492
},
"shards" : {
"0" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 0,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.6g",
"store_size_in_bytes" : 2842186930,
"estimated_flushable_memory_size" : "298.8k",
"estimated_flushable_memory_size_in_bytes" : 306021,
"translog_id" : 0,
"translog_operations" : 306,
"docs" : {
"num_docs" : 1352920,
"max_doc" : 1352920,
"deleted_docs" : 0
}
}, {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 0,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "2.6g",
"store_size_in_bytes" : 2842196644,
"estimated_flushable_memory_size" : "298.8k",
"estimated_flushable_memory_size_in_bytes" : 306021,
"translog_id" : 1,
"translog_operations" : 306,
"docs" : {
"num_docs" : 1352920,
"max_doc" : 1352920,
"deleted_docs" : 0
}
} ],
"1" : [ {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : null,
"shard" : 1,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2109353140,
"estimated_flushable_memory_size" : "304.8k",
"estimated_flushable_memory_size_in_bytes" : 312123,
"translog_id" : 0,
"translog_operations" : 323,
"docs" : {
"num_docs" : 1413888,
"max_doc" : 1418829,
"deleted_docs" : 4941
}
}, {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 1,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2109369148,
"estimated_flushable_memory_size" : "304.8k",
"estimated_flushable_memory_size_in_bytes" : 312123,
"translog_id" : 1,
"translog_operations" : 323,
"docs" : {
"num_docs" : 1413888,
"max_doc" : 1418829,
"deleted_docs" : 4941
}
} ],
"2" : [ {
"routing" : {
"state" : "RELOCATING",
"primary" : false,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 2,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2050764870,
"estimated_flushable_memory_size" : "877.4k",
"estimated_flushable_memory_size_in_bytes" : 898541,
"translog_id" : 0,
"translog_operations" : 897,
"docs" : {
"num_docs" : 1412645,
"max_doc" : 1413300,
"deleted_docs" : 655
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : true,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 2,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "1.9g",
"store_size_in_bytes" : 2050751585,
"estimated_flushable_memory_size" : "876.7k",
"estimated_flushable_memory_size_in_bytes" : 897826,
"translog_id" : 0,
"translog_operations" : 896,
"docs" : {
"num_docs" : 1412644,
"max_doc" : 1413299,
"deleted_docs" : 655
}
} ],
"3" : [ {
"routing" : {
"state" : "RELOCATING",
"primary" : true,
"node" : "9a54c6aa-469d-46ff-a574-dc24c969a0b0",
"relocating_node" : "411f522e-
bfbc-4e0b-8a70-6451d1bef1da",
"shard" : 3,
"index" : "stats"
},
"state" : "STARTED",
"store_size" : "195m",
"store_size_in_bytes" : 204574035,
"estimated_flushable_memory_size" : "3.7m",
"estimated_flushable_memory_size_in_bytes" : 3949835,
"translog_id" : 0,
"translog_operations" : 3842,
"docs" : {
"num_docs" : 103921,
"max_doc" : 103921,
"deleted_docs" : 0
}
}, {
"routing" : {
"state" : "STARTED",
"primary" : false,
"node" : "ccc84aea-aabb-40e3-837c-af1bcb1b764a",
"relocating_node" : null,
"shard" : 3,
"index" : "stats"
},
"state" : "STARTED",
"store_size" :

...

read more »