Hi guys,
We index a document at 4000 milliseconds on ElasticSearch, which I think is
too slow especially for the amount of resources we have setup. We will like
to index at the rate of 500tps. Each document weighs between 20K and 30K.
How many indexes are advisable to be done at once (assuming we can afford
to send multiple http index request to the server at once)? I understand
bulk indexing is a preferred approach, for a 30K document how much can be
bulked at once? How many http bulk request (supposing I am using a
multi-threaded http client ot make requests) is advisable to make?
I will appreciate suggestions and how to index this document as fast as
possible. We have two nodes set up, the config below is for one out of the
two:
Shards: 5
Replica: 1
"nodes" : {
"T5l5mvIdQsW3je7WmSPOcg" : {
"name" : "SEARCH-01",
"version" : "0.90.7",
"attributes" : {
"rack_id" : "prod",
"max_local_storage_nodes" : "1"
},
"settings" : {
"node.rack_id" : "prod",
"action.disable_delete_all_indices" : "true",
"cloud.node.auto_attributes" : "true",
"indices.ttl.interval" : "90d",
"node.max_local_storage_nodes" : "1",
"bootstrap.mlockall" : "true",
"index.mapper.dynamic" : "true",
"cluster.routing.allocation.awareness.attributes" : "rack_id",
"discovery.zen.minimum_master_nodes" : "3",
"gateway.expected_nodes" : "1",
"discovery.zen.ping.unicast.hosts" :
"172.25.15.170,172.25.15.172,172.46.1.170,172.46.1.172",
"discovery.zen.ping.multicast.enabled" : "false",
"action.auto_create_index" : "true"
},
"os" : {
"refresh_interval" : 1000,
"available_processors" : 8,
"cpu" : {
"vendor" : "Intel",
"model" : "Xeon",
"mhz" : 2600,
"total_cores" : 8,
"total_sockets" : 2,
"cores_per_socket" : 4,
"cache_size" : "20kb",
"cache_size_in_bytes" : 20480
},
"mem" : {
"total" : "17.5gb",
"total_in_bytes" : 18836545536
},
"swap" : {
"total" : "5.8gb",
"total_in_bytes" : 6274670592
}
},
"process" : {
"refresh_interval" : 1000,
"id" : 3459,
"max_file_descriptors" : 64000
},
"jvm" : {
"pid" : 3459,
"version" : "1.7.0_45",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "24.45-b08",
"vm_vendor" : "Oracle Corporation",
"start_time" : 1386953353018,
"mem" : {
"heap_init" : "10.5gb",
"heap_init_in_bytes" : 11301552128,
"heap_max" : "10.4gb",
"heap_max_in_bytes" : 11231821824,
"non_heap_init" : "23.1mb",
"non_heap_init_in_bytes" : 24313856,
"non_heap_max" : "214mb",
"non_heap_max_in_bytes" : 224395264,
"direct_max" : "10.4gb",
"direct_max_in_bytes" : 11231821824
}
},
"thread_pool" : {
"generic" : {
"type" : "cached",
"keep_alive" : "30s"
},
"index" : {
"type" : "fixed",
"min" : 8,
"max" : 8,
"queue_size" : "200"
},
"get" : {
"type" : "fixed",
"min" : 8,
"max" : 8,
"queue_size" : "1k"
},
"snapshot" : {
"type" : "scaling",
"min" : 1,
"max" : 4,
"keep_alive" : "5m"
},
"merge" : {
"type" : "scaling",
"min" : 1,
"max" : 4,
"keep_alive" : "5m"
},
"suggest" : {
"type" : "fixed",
"min" : 8,
"max" : 8,
"queue_size" : "1k"
},
"bulk" : {
"type" : "fixed",
"min" : 8,
"max" : 8,
"queue_size" : "50"
},
"optimize" : {
"type" : "fixed",
"min" : 1,
"max" : 1
},
"warmer" : {
"type" : "scaling",
"min" : 1,
"max" : 4,
"keep_alive" : "5m"
},
"flush" : {
"type" : "scaling",
"min" : 1,
"max" : 4,
"keep_alive" : "5m"
},
"search" : {
"type" : "fixed",
"min" : 24,
"max" : 24,
"queue_size" : "1k"
},
"percolate" : {
"type" : "fixed",
"min" : 8,
"max" : 8,
"queue_size" : "1k"
},
"management" : {
"type" : "scaling",
"min" : 1,
"max" : 5,
"keep_alive" : "5m"
},
"refresh" : {
"type" : "scaling",
"min" : 1,
"max" : 4,
"keep_alive" : "5m"
}
},
"network" : {
"refresh_interval" : 5000
},
"http" : {
"max_content_length" : "100mb",
"max_content_length_in_bytes" : 104857600
},
"plugins" : [ ]
}
}
The Mapping is dynamically created because we create types daily and it
looks like:
{
"consumers-20131216": {
"properties": {
"requestData": {
"type": "string"
},
"requestTimestamp": {
"type": "date",
"format": "dateOptionalTime"
},
"responseData": {
"type": "string"
},
"responseTimestamp": {
"type": "date",
"format": "dateOptionalTime"
},
"sequenceId": {
"type": "long"
},
"service": {
"type": "string"
},
"systemResponseCode": {
"type": "string"
},
"systemResponseMessage": {
"type": "string"
},
"transactionComponentTypeId": {
"type": "long"
},
"transactionLogId": {
"type": "long"
},
"user": {
"type": "string"
}
}
}
}
Regards.
--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/4af4e271-048a-442a-bdc6-2f529824df06%40googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.