Hi, all:
I wanna use ElasticSearch to store and search web logs in realtime, and I use Python API to bulk insert into ElasticSearch.
I'm trying to bulk insert batches of 1000 documents into elastic search using a predefined Mapping . Yet each bulk insert takes roughly 1 seconds any idea to improve bulk performance?
ElasticSearch Configuraiton
network.host: 0.0.0.0
indices.memory.index_buffer_size: "1g"
bootstrap.memory_lock: true
JVM
-Xms8g
-Xmx8g
Index mapping
PUT protocol
{
"settings": {
"index": {
"number_of_shards": 5,
"number_of_replicas": 0,
"translog.flush_threshold_size": "1gb",
"translog.durability": "async"
}
},
"mappings": {
"http": {
"properties": {
"timestamp": {
"type": "date",
"index": true
},
"asset_keys": {
"type": "keyword",
"index": true
},
"skey": {
"type": "keyword",
"index": true
},
"dkey": {
"type": "keyword",
"index": true
},
"uid": {
"type": "keyword",
"index": true
},
"src_ip": {
"type": "keyword",
"index": true,
"copy_to": "protocol_all"
},
"src_port": {
"type": "keyword",
"index": true,
"copy_to": "protocol_all"
},
"dst_ip": {
"type": "keyword",
"index": true,
"copy_to": "protocol_all"
},
"dst_port": {
"type": "keyword",
"index": true,
"copy_to": "protocol_all"
},
"method": {
"type": "keyword",
"index": true,
"copy_to": "protocol_all"
},
"URI": {
"type": "keyword",
"index": false,
"copy_to": "protocol_all"
},
"url": {
"type": "keyword",
"index": false,
"copy_to": "protocol_all"
},
"protocol": {
"type": "keyword",
"index": false,
"copy_to": "protocol_all"
},
"host": {
"type": "keyword",
"index": false,
"copy_to": "protocol_all"
},
"User-Agent": {
"type": "keyword",
"index": true,
"copy_to": "protocol_all"
},
"Cookie": {
"type": "keyword",
"index": false,
"copy_to": "protocol_all"
},
"username": {
"type": "keyword",
"index": false,
"copy_to": "protocol_all"
},
"password": {
"type": "keyword",
"index": false
},
"status_code": {
"type": "integer",
"index": false
},
"request": {
"type": "object",
"enabled": false
},
"response": {
"type": "object",
"enabled": false
},
"sensor_ip": {
"type": "keyword",
"index": true,
"copy_to": "protocol_all"
},
"location": {
"type": "geo_point",
"index": false
},
"city": {
"type": "keyword",
"index": false
},
"country": {
"type": "keyword",
"index": false
},
"raw_data": {
"type": "object",
"enabled": false
},
"protocol_all": {
"type": "keyword",
"index": true
},
"es_type": {
"type": "keyword",
"index": false
}
}
}
}
}
And the insert data almost 1000 * 10k per bulk request