Thank you for the information regarding the thread,
In regard to additional information in relation to the question, we can take a simple query with size set to 100:
{
"query": {"bool": {"must": [{"match_all": {}}], "must_not": [], "should": []}},
"from": 0,
"size": 100,
"sort": [],
"aggs": {},
"_source": [
"field1",
"field2",
"field3",
"field4",
"field5",
"field6",
"field7",
"field8",
"field9",
"field10",
"field11",
"field12",
"field13”,
"field14",
"field15",
"field16",
]
}
Elasticsearch reports time taken to finish this query is more than a second, approximately 1200-1300 ms. The profile information for this query is:
{
"shards": [
{
"id": "[shard_id][sample_index][0]",
"searches": [
{
"query": [
{
"type": "ConstantScoreQuery",
"description": "ConstantScore(FieldExistsQuery [field=_primary_term])",
"time_in_nanos": 194619,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 85247,
"match": 0,
"next_doc_count": 558,
"score_count": 558,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 9136,
"advance_count": 10,
"score": 23042,
"build_scorer_count": 20,
"create_weight": 4883,
"shallow_advance": 0,
"create_weight_count": 1,
"build_scorer": 72311
},
"children": [
{
"type": "FieldExistsQuery",
"description": "FieldExistsQuery [field=_primary_term]",
"time_in_nanos": 94503,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 38538,
"match": 0,
"next_doc_count": 558,
"score_count": 0,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 8169,
"advance_count": 10,
"score": 0,
"build_scorer_count": 20,
"create_weight": 1730,
"shallow_advance": 0,
"create_weight_count": 1,
"build_scorer": 46066
}
}
]
}
],
"rewrite_time": 74439,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time_in_nanos": 297006,
"children": [
{
"name": "SimpleTopScoreDocCollector",
"reason": "search_top_hits",
"time_in_nanos": 93227
},
{
"name": "BucketCollectorWrapper: [BucketCollectorWrapper[bucketCollector=org.elasticsearch.search.aggregations.BucketCollector$1@ID]]",
"reason": "aggregation",
"time_in_nanos": 39868
}
]
}
]
}
],
"aggregations": [],
"fetch": {
"type": "fetch",
"description": "",
"time_in_nanos": 1468543241,
"breakdown": {
"load_stored_fields": 335731591,
"load_source": 597153,
"load_stored_fields_count": 100,
"next_reader_count": 4,
"load_source_count": 100,
"next_reader": 475473
},
"debug": {
"stored_fields": [
"_id",
"_routing",
"_source"
]
},
"children": [
{
"type": "FetchSourcePhase",
"description": "",
"time_in_nanos": 1128076983,
"breakdown": {
"process_count": 100,
"process": 1128072393,
"next_reader": 4590,
"next_reader_count": 4
},
"debug": {
"fast_path": 0
}
},
{
"type": "StoredFieldsPhase",
"description": "",
"time_in_nanos": 2392441,
"breakdown": {
"process_count": 100,
"process": 2380853,
"next_reader": 11588,
"next_reader_count": 4
}
}
]
}
}
]
}
However, if we remove source filtering:
{
"query": {"bool": {"must": [{"match_all": {}}], "must_not": [], "should": []}},
"from": 0,
"size": 100,
"sort": [],
"aggs": {}
}
The query takes only around 280-300 ms. The profile information for the query is:
{
"shards": [
{
"id": "[shard_id][sample_index][0]",
"searches": [
{
"query": [
{
"type": "ConstantScoreQuery",
"description": "ConstantScore(FieldExistsQuery [field=_primary_term])",
"time_in_nanos": 339290,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 161142,
"match": 0,
"next_doc_count": 558,
"score_count": 558,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 10104,
"advance_count": 10,
"score": 34467,
"build_scorer_count": 20,
"create_weight": 4705,
"shallow_advance": 0,
"create_weight_count": 1,
"build_scorer": 128872
},
"children": [
{
"type": "FieldExistsQuery",
"description": "FieldExistsQuery [field=_primary_term]",
"time_in_nanos": 164376,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 92178,
"match": 0,
"next_doc_count": 558,
"score_count": 0,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 8662,
"advance_count": 10,
"score": 0,
"build_scorer_count": 20,
"create_weight": 1497,
"shallow_advance": 0,
"create_weight_count": 1,
"build_scorer": 62039
}
}
]
}
],
"rewrite_time": 63997,
"collector": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time_in_nanos": 461468,
"children": [
{
"name": "SimpleTopScoreDocCollector",
"reason": "search_top_hits",
"time_in_nanos": 144301
},
{
"name": "BucketCollectorWrapper: [BucketCollectorWrapper[bucketCollector=org.elasticsearch.search.aggregations.BucketCollector$1@ID]]",
"reason": "aggregation",
"time_in_nanos": 79751
}
]
}
]
}
],
"aggregations": [],
"fetch": {
"type": "fetch",
"description": "",
"time_in_nanos": 284016497,
"breakdown": {
"load_stored_fields": 282148220,
"load_source": 150140,
"load_stored_fields_count": 100,
"next_reader_count": 4,
"load_source_count": 100,
"next_reader": 435831
},
"debug": {
"stored_fields": [
"_id",
"_routing",
"_source"
]
},
"children": [
{
"type": "FetchSourcePhase",
"description": "",
"time_in_nanos": 232649,
"breakdown": {
"process_count": 100,
"process": 228559,
"next_reader": 4090,
"next_reader_count": 4
},
"debug": {
"fast_path": 100
}
},
{
"type": "StoredFieldsPhase",
"description": "",
"time_in_nanos": 617691,
"breakdown": {
"process_count": 100,
"process": 607069,
"next_reader": 10622,
"next_reader_count": 4
}
}
]
}
}
]
}
I have been using Elasticsearch 8.6.0.
The number of documents in the index is 560 and each document is around 550 kilobytes. It has 1 shard and 1 replica.
The JVM heap size of the cluster is 4 GiB and the memory of the cluster is 8 GiB with 4 allocated processors, in a Linux environment (Ubuntu 20.04.5 LTS).
The data mapping consists of mostly text fields and a few vector fields. There is one nested data type with various text subfields and a few numeric types, (long and float).
If some more information is needed, please let me know.