Hi,
Is there an aggregated, and detailed, source of information regarding optimising nvme ssd config for indexing heavy use cases with elasticsearch?
Thx
D
Hi,
Is there an aggregated, and detailed, source of information regarding optimising nvme ssd config for indexing heavy use cases with elasticsearch?
Thx
D
How did you determine that the storage is the bottleneck? Have you followed these guidelines?
I haven't @Christian_Dahlqvist. I've taken ownership of a set of clusters which haven't been well configured or managed. I'm evaluating all levels of the stack, hence the question.
Looking into this some more I'm seeing hot_threads
output which seems to point to i/o issues:
::: {generic-node}{uMrBwihLR6K1Af91obRTAg}{pCCp2d4rQmKOS9Teyy4X_w}{generic-node}{generic-node:9300}{aws_availability_zone=us-east-1a, ml.machine_memory=64384024576, ml.max_open_jobs=20, xpack.installed=true}
Hot threads at 2020-08-13T07:44:01.157Z, interval=500ms, busiestThreads=3, ignoreIdleThreads=true:
89.5% (447.5ms out of 500ms) cpu usage by thread 'elasticsearch[generic-node][write][T#3]'
7/10 snapshots sharing following 52 elements
app//org.apache.lucene.codecs.lucene80.IndexedDISI$Method$1.advanceExactWithinBlock(IndexedDISI.java:507)
app//org.apache.lucene.codecs.lucene80.IndexedDISI.advanceExact(IndexedDISI.java:399)
app//org.apache.lucene.codecs.lucene80.Lucene80NormsProducer$SparseNormsIterator.advanceExact(Lucene80NormsProducer.java:186)
app//org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter.startDoc(Lucene50PostingsWriter.java:264)
app//org.apache.lucene.codecs.PushPostingsWriterBase.writeTerm(PushPostingsWriterBase.java:148)
app//org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter$TermsWriter.write(BlockTreeTermsWriter.java:865)
app//org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter.write(BlockTreeTermsWriter.java:344)
app//org.apache.lucene.codecs.perfield.PerFieldPostingsFormat$FieldsWriter.write(PerFieldPostingsFormat.java:142)
app//org.apache.lucene.index.FreqProxTermsWriter.flush(FreqProxTermsWriter.java:97)
app//org.apache.lucene.index.DefaultIndexingChain.flush(DefaultIndexingChain.java:176)
These are AWS i3.2xlarge instances. Each node is general purpose. Indexing rates are around 40k/sec (replicated) but can fall to around 5k/sec which is when the snapshot above was taken.
How many indices and shards are you indexing into? What are your index settings? Is your data immutable? Which version are you using?
Use Case: logging
Version: 7.2.0
Number of Nodes: 13
Number of Active Indices: 1
Number of Shards: 39 (primary)
Index settings:
{
"settings": {
"index": {
"mapping": {
"total_fields": {
"limit": "10000"
},
"ignore_malformed": "true"
},
"refresh_interval": "30s",
"indexing": {
"slowlog": {
"level": "debug",
"threshold": {
"index": {
"warn": "10s",
"trace": "500ms",
"debug": "2s",
"info": "5s"
}
},
"source": "500"
}
},
"translog": {
"flush_threshold_size": "1024MB"
},
"provided_name": "aws-logstash-log-2020.08.13",
"query": {
"default_field": "message"
},
"creation_date": "1597276800761",
"number_of_replicas": "1",
"uuid": "O0vK_wU8SwyaBMnKiruzCA",
"version": {
"created": "7020099"
},
"codec": "best_compression",
"search": {
"slowlog": {
"level": "info",
"threshold": {
"fetch": {
"warn": "1s",
"trace": "200ms",
"debug": "500ms",
"info": "800ms"
},
"query": {
"warn": "10s",
"trace": "500ms",
"debug": "2s",
"info": "5s"
}
}
}
},
"number_of_shards": "39"
}
},
"defaults": {
"index": {
"max_inner_result_window": "100",
"unassigned": {
"node_left": {
"delayed_timeout": "1m"
}
},
"max_terms_count": "65536",
"lifecycle": {
"name": "",
"rollover_alias": "",
"indexing_complete": "false"
},
"routing_partition_size": "1",
"force_memory_term_dictionary": "false",
"max_docvalue_fields_search": "100",
"merge": {
"scheduler": {
"max_thread_count": "4",
"auto_throttle": "true",
"max_merge_count": "9"
},
"policy": {
"reclaim_deletes_weight": "2.0",
"floor_segment": "2mb",
"max_merge_at_once_explicit": "30",
"max_merge_at_once": "10",
"max_merged_segment": "5gb",
"expunge_deletes_allowed": "10.0",
"segments_per_tier": "10.0",
"deletes_pct_allowed": "33.0"
}
},
"max_refresh_listeners": "1000",
"max_regex_length": "1000",
"load_fixed_bitset_filters_eagerly": "true",
"number_of_routing_shards": "1",
"write": {
"wait_for_active_shards": "1"
},
"verified_before_close": "false",
"mapping": {
"coerce": "false",
"nested_fields": {
"limit": "50"
},
"nested_objects": {
"limit": "10000"
},
"depth": {
"limit": "20"
},
"field_name_length": {
"limit": "9223372036854775807"
}
},
"source_only": "false",
"soft_deletes": {
"enabled": "false",
"retention": {
"operations": "0"
},
"retention_lease": {
"period": "12h"
}
},
"max_script_fields": "32",
"query": {
"parse": {
"allow_unmapped_fields": "true"
}
},
"format": "0",
"frozen": "false",
"sort": {
"missing": [],
"mode": [],
"field": [],
"order": []
},
"priority": "1",
"max_rescore_window": "10000",
"max_adjacency_matrix_filters": "100",
"analyze": {
"max_token_count": "10000"
},
"gc_deletes": "60s",
"optimize_auto_generated_id": "true",
"max_ngram_diff": "1",
"translog": {
"generation_threshold_size": "64mb",
"sync_interval": "5s",
"retention": {
"size": "512mb",
"age": "12h"
},
"durability": "REQUEST"
},
"auto_expand_replicas": "false",
"mapper": {
"dynamic": "true"
},
"requests": {
"cache": {
"enable": "true"
}
},
"data_path": "",
"highlight": {
"max_analyzed_offset": "1000000"
},
"routing": {
"rebalance": {
"enable": "all"
},
"allocation": {
"enable": "all",
"total_shards_per_node": "-1"
}
},
"search": {
"idle": {
"after": "30s"
},
"throttled": "false"
},
"fielddata": {
"cache": "node"
},
"default_pipeline": "_none",
"max_slices_per_scroll": "1024",
"shard": {
"check_on_startup": "false"
},
"xpack": {
"watcher": {
"template": {
"version": ""
}
},
"version": "",
"ccr": {
"following_index": "false"
}
},
"percolator": {
"map_unmapped_fields_as_text": "false"
},
"allocation": {
"max_retries": "5"
},
"indexing": {
"slowlog": {
"reformat": "true"
}
},
"compound_format": "0.1",
"blocks": {
"metadata": "false",
"read": "false",
"read_only_allow_delete": "false",
"read_only": "false",
"write": "false"
},
"max_result_window": "10000",
"store": {
"stats_refresh_interval": "10s",
"type": "",
"fs": {
"fs_lock": "native"
},
"preload": []
},
"queries": {
"cache": {
"enabled": "true"
}
},
"warmer": {
"enabled": "true"
},
"max_shingle_diff": "3",
"query_string": {
"lenient": "false"
}
}
}
}
This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.
© 2020. All Rights Reserved - Elasticsearch
Apache, Apache Lucene, Apache Hadoop, Hadoop, HDFS and the yellow elephant logo are trademarks of the Apache Software Foundation in the United States and/or other countries.