Hi,
We're trying to index a few million responses to many (10000+) forms.
We're creating a separate type for each form since they can have
different fields. So each form is associated with a type named form-
.
In bulk indexing the existing data, we are creating the new types and
their mappings on the fly, interspersed with bulk indexing the
documents. After running through about 200k responses the server
stopped responding and appeared to be running out of memory. The
cluster node stats are pasted below.
This two node cluster (8 GB machines) has indices for other
applications (of similar size) already running successfully on it, and
we've can bulk index them without a problem. So I assume the large
number of types is causing the issue.
So some questions:
-
I'm planning to try creating all of the types first and then bulk
index. Is intermixing type creation and adding docs expected to run
into performance problems? -
It seems from the cluster stats that there is a surprising amount
of data going in one direction. Could this be the performance problem? -
We're planning to do term facet searches (for word cloud
generation) for each of the above types. I think I read that the
entire index gets loaded when a term facet is done. If I do a term
facet on a particular type, will only the portion of the index for
that type be loaded, or will it still be the whole thing? If the whole
thing, any way I can move the data into separate indices without
having 10k indices show up when I go to look at the size of my
indices. There is no need for them to be in the same index, I just
don't want the 10k tables making it harder for me to examine the
indices for other applications.
Thanks for the help
-Greg
The index in question:
"index" : {
"primary_size" : "55.8mb",
"primary_size_in_bytes" : 58586978,
"size" : "111.7mb",
"size_in_bytes" : 117177113
},
"translog" : {
"operations" : 0
},
"docs" : {
"num_docs" : 214308,
"max_doc" : 214314,
"deleted_docs" : 6
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size" : "0b",
"current_size_in_bytes" : 0,
"total" : 0,
"total_time" : "0s",
"total_time_in_millis" : 0,
"total_docs" : 0,
"total_size" : "0b",
"total_size_in_bytes" : 0
},
"refresh" : {
"total" : 6426,
"total_time" : "4.8m",
"total_time_in_millis" : 289292
},
"flush" : {
"total" : 2702,
"total_time" : "1.6m",
"total_time_in_millis" : 97459
},
/_cluster/node/stats
{
"cluster_name" : "mgs2",
"nodes" : {
"x25SzznQQZ2uPGQvgGjs4Q" : {
"name" : "Nut",
"indices" : {
"store" : {
"size" : "1.6gb",
"size_in_bytes" : 1806371221
},
"docs" : {
"count" : 557654,
"deleted" : 114387
},
"indexing" : {
"index_total" : 18577042,
"index_time" : "40m",
"index_time_in_millis" : 2403162,
"index_current" : 0,
"delete_total" : 0,
"delete_time" : "0s",
"delete_time_in_millis" : 0,
"delete_current" : 0
},
"get" : {
"total" : 86,
"time" : "60ms",
"time_in_millis" : 60,
"exists_total" : 24,
"exists_time" : "40ms",
"exists_time_in_millis" : 40,
"missing_total" : 62,
"missing_time" : "20ms",
"missing_time_in_millis" : 20,
"current" : 0
},
"search" : {
"query_total" : 20121,
"query_time" : "4.8m",
"query_time_in_millis" : 288251,
"query_current" : 0,
"fetch_total" : 15337,
"fetch_time" : "57.4s",
"fetch_time_in_millis" : 57470,
"fetch_current" : 0
},
"cache" : {
"field_evictions" : 0,
"field_size" : "4.4mb",
"field_size_in_bytes" : 4675324,
"filter_count" : 3,
"filter_evictions" : 0,
"filter_size" : "92.9kb",
"filter_size_in_bytes" : 95144
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size" : "0b",
"current_size_in_bytes" : 0,
"total" : 56,
"total_time" : "15.4s",
"total_time_in_millis" : 15492,
"total_docs" : 1887347,
"total_size" : "495.7mb",
"total_size_in_bytes" : 519823123
},
"refresh" : {
"total" : 6748,
"total_time" : "4.8m",
"total_time_in_millis" : 290519
},
"flush" : {
"total" : 2972,
"total_time" : "1.6m",
"total_time_in_millis" : 101684
}
},
"os" : {
"timestamp" : 1324472103660,
"uptime" : "-1 seconds",
"uptime_in_millis" : -1000,
"load_average" : [ ]
},
"process" : {
"timestamp" : 1324472103660,
"open_file_descriptors" : 1257
},
"jvm" : {
"timestamp" : 1324472103660,
"uptime" : "15 hours, 58 minutes, 35 seconds and 124
milliseconds",
"uptime_in_millis" : 57515124,
"mem" : {
"heap_used" : "2.3gb",
"heap_used_in_bytes" : 2524820736,
"heap_committed" : "4.9gb",
"heap_committed_in_bytes" : 5340397568,
"non_heap_used" : "46mb",
"non_heap_used_in_bytes" : 48276152,
"non_heap_committed" : "69.8mb",
"non_heap_committed_in_bytes" : 73248768
},
"threads" : {
"count" : 72,
"peak_count" : 86
},
"gc" : {
"collection_count" : 16609,
"collection_time" : "4 minutes, 25 seconds and 568
milliseconds",
"collection_time_in_millis" : 265568,
"collectors" : {
"ParNew" : {
"collection_count" : 16600,
"collection_time" : "4 minutes, 24 seconds and 807
milliseconds",
"collection_time_in_millis" : 264807
},
"ConcurrentMarkSweep" : {
"collection_count" : 9,
"collection_time" : "761 milliseconds",
"collection_time_in_millis" : 761
}
}
}
},
"network" : {
},
"transport" : {
"server_open" : 14,
"rx_count" : 243057,
"rx_size" : "452.2mb",
"rx_size_in_bytes" : 474228725,
"tx_count" : 321262,
"tx_size" : "4.1gb",
"tx_size_in_bytes" : 4459909980
},
"http" : {
"current_open" : 8,
"total_opened" : 7208
}
},
"fxJdAyuKSZeLRQ0nrBybGg" : {
"name" : "Bantam",
"indices" : {
"store" : {
"size" : "1.7gb",
"size_in_bytes" : 1889182815
},
"docs" : {
"count" : 557472,
"deleted" : 114386
},
"indexing" : {
"index_total" : 18576863,
"index_time" : "1.1h",
"index_time_in_millis" : 4074696,
"index_current" : 5,
"delete_total" : 0,
"delete_time" : "0s",
"delete_time_in_millis" : 0,
"delete_current" : 0
},
"get" : {
"total" : 91,
"time" : "34ms",
"time_in_millis" : 34,
"exists_total" : 43,
"exists_time" : "16ms",
"exists_time_in_millis" : 16,
"missing_total" : 48,
"missing_time" : "18ms",
"missing_time_in_millis" : 18,
"current" : 0
},
"search" : {
"query_total" : 20060,
"query_time" : "11.4m",
"query_time_in_millis" : 686600,
"query_current" : 5,
"fetch_total" : 15146,
"fetch_time" : "2.4m",
"fetch_time_in_millis" : 148005,
"fetch_current" : 1
},
"cache" : {
"field_evictions" : 0,
"field_size" : "4.4mb",
"field_size_in_bytes" : 4674888,
"filter_count" : 2,
"filter_evictions" : 0,
"filter_size" : "92.5kb",
"filter_size_in_bytes" : 94760
},
"merges" : {
"current" : 5,
"current_docs" : 214061,
"current_size" : "55.8mb",
"current_size_in_bytes" : 58566804,
"total" : 55,
"total_time" : "5.6m",
"total_time_in_millis" : 341870,
"total_docs" : 2010928,
"total_size" : "527.5mb",
"total_size_in_bytes" : 553224978
},
"refresh" : {
"total" : 7365,
"total_time" : "7.4m",
"total_time_in_millis" : 447627
},
"flush" : {
"total" : 2977,
"total_time" : "13.4m",
"total_time_in_millis" : 804327
}
},
"os" : {
"timestamp" : 1324472120587,
"uptime" : "-1 seconds",
"uptime_in_millis" : -1000,
"load_average" : [ ]
},
"process" : {
"timestamp" : 1324472120587,
"open_file_descriptors" : 1438
},
"jvm" : {
"timestamp" : 1324472120589,
"uptime" : "15 hours, 58 minutes, 25 seconds and 155
milliseconds",
"uptime_in_millis" : 57505155,
"mem" : {
"heap_used" : "4.9gb",
"heap_used_in_bytes" : 5317828824,
"heap_committed" : "4.9gb",
"heap_committed_in_bytes" : 5340397568,
"non_heap_used" : "44.4mb",
"non_heap_used_in_bytes" : 46654016,
"non_heap_committed" : "68.6mb",
"non_heap_committed_in_bytes" : 71991296
},
"threads" : {
"count" : 94,
"peak_count" : 96
},
"gc" : {
"collection_count" : 15361,
"collection_time" : "7 minutes, 38 seconds and 876
milliseconds",
"collection_time_in_millis" : 458876,
"collectors" : {
"ParNew" : {
"collection_count" : 15245,
"collection_time" : "6 minutes, 6 seconds and 39
milliseconds",
"collection_time_in_millis" : 366039
},
"ConcurrentMarkSweep" : {
"collection_count" : 116,
"collection_time" : "1 minute, 32 seconds and 837
milliseconds",
"collection_time_in_millis" : 92837
}
}
}
},
"network" : {
},
"transport" : {
"server_open" : 14,
"rx_count" : 242790,
"rx_size" : "4.1gb",
"rx_size_in_bytes" : 4459575446,
"tx_count" : 250201,
"tx_size" : "452mb",
"tx_size_in_bytes" : 474044111
},
"http" : {
"current_open" : 3,
"total_opened" : 7074
}
}
}
}