Thanks for the suggestions.
I first captured hot_threads. Here is the response on ES9 while running the query:
::: {myhostname}{_AX4jIRPS1uknpFQ6MoXQA}{LbwweeGFQOWcrjrTI9JVAA}{myhostname}{127.0.0.1}{127.0.0.1:9300}{cdfhilmrstw}{9.2.1}{8000099-9039001}{ml.machine_memory=38654705664, transform.config_version=10.0.0, xpack.installed=true, ml.config_version=12.0.0, ml.max_jvm_size=4294967296, ml.allocated_processors_double=14.0, ml.allocated_processors=14}
Hot threads at 2025-11-12T15:29:55.537Z, interval=500ms, busiestThreads=3, ignoreIdleThreads=true:
101.5% [cpu=101.5%, other=0.0%] (507.3ms out of 500ms) cpu usage by thread 'elasticsearch[myhostname][search][T#9]'
10/10 snapshots sharing following 39 elements
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.aggregations.metrics.TopHitsAggregator$1.setScorer(TopHitsAggregator.java:125)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.aggregations.LeafBucketCollectorBase.setScorer(LeafBucketCollectorBase.java:42)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.aggregations.LeafBucketCollectorBase.setScorer(LeafBucketCollectorBase.java:42)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.query.QueryPhaseCollector$CompositeLeafCollector.setScorer(QueryPhaseCollector.java:278)
app/org.apache.lucene.core@10.3.1/org.apache.lucene.search.FilterLeafCollector.setScorer(FilterLeafCollector.java:37)
app/org.apache.lucene.core@10.3.1/org.apache.lucene.search.ScoreCachingWrappingScorer$ScoreCachingWrappingLeafCollector.setScorer(ScoreCachingWrappingScorer.java:60)
app/org.apache.lucene.core@10.3.1/org.apache.lucene.search.BooleanScorerSupplier$1$1.setScorer(BooleanScorerSupplier.java:262)
app/org.apache.lucene.core@10.3.1/org.apache.lucene.search.Weight$DefaultBulkScorer.score(Weight.java:254)
app/org.apache.lucene.core@10.3.1/org.apache.lucene.search.BooleanScorerSupplier$1.score(BooleanScorerSupplier.java:270)
app/org.apache.lucene.core@10.3.1/org.apache.lucene.search.ReqExclBulkScorer.score(ReqExclBulkScorer.java:69)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.internal.CancellableBulkScorer.score(CancellableBulkScorer.java:46)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.internal.ContextIndexSearcher.searchLeaf(ContextIndexSearcher.java:465)
app/org.apache.lucene.core@10.3.1/org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:809)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:389)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.internal.ContextIndexSearcher.lambda$search$3(ContextIndexSearcher.java:367)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.internal.ContextIndexSearcher$$Lambda/0x000007f80143fcf0.call(Unknown Source)
java.base@25.0.1/java.util.concurrent.FutureTask.run(FutureTask.java:328)
app/org.apache.lucene.core@10.3.1/org.apache.lucene.search.TaskExecutor$Task.run(TaskExecutor.java:173)
app/org.apache.lucene.core@10.3.1/org.apache.lucene.search.TaskExecutor.invokeAll(TaskExecutor.java:111)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:371)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.internal.ContextIndexSearcher.search(ContextIndexSearcher.java:338)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.query.QueryPhase.addCollectorsAndSearch(QueryPhase.java:212)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.query.QueryPhase.executeQuery(QueryPhase.java:143)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.query.QueryPhase.execute(QueryPhase.java:70)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.SearchService.loadOrExecuteQueryPhase(SearchService.java:700)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:906)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.SearchService.lambda$executeQueryPhase$7(SearchService.java:739)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.search.SearchService$$Lambda/0x000007f801425550.get(Unknown Source)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.action.ActionRunnable$3.accept(ActionRunnable.java:79)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.action.ActionRunnable$3.accept(ActionRunnable.java:76)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.action.ActionRunnable$4.doRun(ActionRunnable.java:101)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:27)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:35)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:1076)
app/org.elasticsearch.server@9.2.1/org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:27)
java.base@25.0.1/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1090)
java.base@25.0.1/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:614)
java.base@25.0.1/java.lang.Thread.runWith(Thread.java:1487)
java.base@25.0.1/java.lang.Thread.run(Thread.java:1474)
So the TopHitsAggregator seems to run hot. And yes, if I remove the sub-top_hits-aggregation, the response is very fast on ES9. But I need that.
And now it's getting really weird: I added "profile":true to my query:
{
"profile": true,
"query": {
"bool": {
"must_not": [
{
"term": {
"files.storage": "INTERNAL_MANAGED"
}
}
]
}
},
"size": 1,
"aggs": {
"creatorsAgg": {
"aggs": {
"by_cone_id": {
"terms": {
"field": "metadata.creators.person.identifier.id"
},
"aggs": {
"creator_info": {
"top_hits": {
"_source": {
"includes": [
"metadata.creators.person.givenName",
"metadata.creators.person.familyName",
"metadata.creators.person.identifier.id"
]
},
"size": 1
}
}
}
}
},
"nested": {
"path": "metadata.creators"
}
}
}
}
And suddenly it works like a charm on ES9. Response Times are:
1268ms, 220ms, 220ms, 252ms, 233ms
If I reset "profile" to false, response times are as bad as ever:
21764ms, 22145ms, 23670ms...
What is going here??? 
Anyway, here's the response of the query with "profile":true (I removed the hits and aggregation section due to the length):
{
"took": 236,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"profile": {
"shards": [
{
"id": "[_AX4jIRPS1uknpFQ6MoXQA][items][0]",
"node_id": "_AX4jIRPS1uknpFQ6MoXQA",
"shard_id": 0,
"index": "items",
"cluster": "(local)",
"searches": [
{
"query": [
{
"type": "BooleanQuery",
"description": "-files.storage:INTERNAL_MANAGED #FieldExistsQuery [field=_primary_term]",
"time_in_nanos": 70940168,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 745994,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 38829435,
"match": 17639098,
"next_doc_count": 746016,
"score_count": 564169,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 0,
"advance_count": 0,
"count_weight_count": 0,
"score": 10372802,
"build_scorer_count": 44,
"create_weight": 274041,
"shallow_advance": 0,
"count_weight": 0,
"create_weight_count": 1,
"build_scorer": 3824792
},
"children": [
{
"type": "TermQuery",
"description": "files.storage:INTERNAL_MANAGED",
"time_in_nanos": 6605494,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 0,
"match": 0,
"next_doc_count": 0,
"score_count": 0,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 6354449,
"advance_count": 309858,
"count_weight_count": 0,
"score": 0,
"build_scorer_count": 44,
"create_weight": 4583,
"shallow_advance": 0,
"count_weight": 0,
"create_weight_count": 1,
"build_scorer": 246462
}
},
{
"type": "FieldExistsQuery",
"description": "FieldExistsQuery [field=_primary_term]",
"time_in_nanos": 15006760,
"breakdown": {
"set_min_competitive_score_count": 0,
"match_count": 0,
"shallow_advance_count": 0,
"set_min_competitive_score": 0,
"next_doc": 14890760,
"match": 0,
"next_doc_count": 746016,
"score_count": 0,
"compute_max_score_count": 0,
"compute_max_score": 0,
"advance": 0,
"advance_count": 0,
"count_weight_count": 0,
"score": 0,
"build_scorer_count": 66,
"create_weight": 2292,
"shallow_advance": 0,
"count_weight": 0,
"create_weight_count": 1,
"build_scorer": 113708
}
}
]
}
],
"rewrite_time": 269209,
"collector": [
{
"name": "QueryPhaseCollector",
"reason": "search_query_phase",
"time_in_nanos": 178226458,
"children": [
{
"name": "TopScoreDocCollector",
"reason": "search_top_hits",
"time_in_nanos": 9813723
},
{
"name": "AggregatorCollector: [creatorsAgg]",
"reason": "aggregation",
"time_in_nanos": 135370832
}
]
}
]
}
],
"aggregations": [
{
"type": "NestedAggregator",
"description": "creatorsAgg",
"time_in_nanos": 136924698,
"breakdown": {
"reduce": 0,
"build_aggregation_count": 1,
"post_collection": 2750,
"reduce_count": 0,
"initialize_count": 1,
"collect_count": 564169,
"post_collection_count": 1,
"build_leaf_collector": 1705623,
"build_aggregation": 12730667,
"build_leaf_collector_count": 22,
"initialize": 49416,
"collect": 122436242
},
"debug": {
"built_buckets": 1
},
"children": [
{
"type": "GlobalOrdinalsStringTermsAggregator",
"description": "by_cone_id",
"time_in_nanos": 147614111,
"breakdown": {
"reduce": 0,
"build_aggregation_count": 1,
"post_collection": 500,
"reduce_count": 0,
"initialize_count": 1,
"collect_count": 2990900,
"post_collection_count": 1,
"build_leaf_collector": 1131956,
"build_aggregation": 12688875,
"build_leaf_collector_count": 22,
"initialize": 20792,
"collect": 133771988
},
"debug": {
"segments_with_multi_valued_ords": 22,
"collection_strategy": "dense",
"segments_with_single_valued_ords": 0,
"total_buckets": 72956,
"built_buckets": 1,
"result_strategy": "terms",
"has_filter": false
},
"children": [
{
"type": "TopHitsAggregator",
"description": "creator_info",
"time_in_nanos": 63222736,
"breakdown": {
"reduce": 0,
"build_aggregation_count": 1,
"post_collection": 209,
"reduce_count": 0,
"initialize_count": 1,
"collect_count": 820466,
"post_collection_count": 1,
"build_leaf_collector": 330331,
"build_aggregation": 11600416,
"build_leaf_collector_count": 22,
"initialize": 2459,
"collect": 51289321
},
"debug": {
"fetch_profile": [
{
"breakdown": {
"load_stored_fields": 122958,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 211250
},
"time": 1369500
},
{
"breakdown": {
"load_stored_fields": 53125,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 93584
},
"time": 418084
},
{
"breakdown": {
"load_stored_fields": 58583,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 81125
},
"time": 345917
},
{
"breakdown": {
"load_stored_fields": 51667,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 83625
},
"time": 319459
},
{
"breakdown": {
"load_stored_fields": 50084,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 77792
},
"time": 359417
},
{
"breakdown": {
"load_stored_fields": 69750,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 81125
},
"time": 346750
},
{
"breakdown": {
"load_stored_fields": 42375,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 80917
},
"time": 317000
},
{
"breakdown": {
"load_stored_fields": 49833,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 79375
},
"time": 327875
},
{
"breakdown": {
"load_stored_fields": 38042,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 79417
},
"time": 311375
},
{
"breakdown": {
"load_stored_fields": 38542,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 84167
},
"time": 564250
},
{
"breakdown": {
"load_stored_fields": 43792,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 79667
},
"time": 333625
},
{
"breakdown": {
"load_stored_fields": 63958,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 79625
},
"time": 346292
},
{
"breakdown": {
"load_stored_fields": 47958,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 83250
},
"time": 310250
},
{
"breakdown": {
"load_stored_fields": 53292,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 77875
},
"time": 337250
},
{
"breakdown": {
"load_stored_fields": 50999,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 76250
},
"time": 313708
},
{
"breakdown": {
"load_stored_fields": 38084,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 79541
},
"time": 306625
},
{
"breakdown": {
"load_stored_fields": 46500,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 76333
},
"time": 289458
},
{
"breakdown": {
"load_stored_fields": 53292,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 75750
},
"time": 310625
},
{
"breakdown": {
"load_stored_fields": 36750,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 77458
},
"time": 293958
},
{
"breakdown": {
"load_stored_fields": 29875,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 76917
},
"time": 275583
},
{
"breakdown": {
"load_stored_fields": 45000,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 79000
},
"time": 304334
},
{
"breakdown": {
"load_stored_fields": 55251,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 97291
},
"time": 326917
},
{
"breakdown": {
"load_stored_fields": 40958,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 78500
},
"time": 302417
},
{
"breakdown": {
"load_stored_fields": 43417,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 75000
},
"time": 283834
},
{
"breakdown": {
"load_stored_fields": 37333,
"load_source": 0,
"load_stored_fields_count": 2,
"next_reader_count": 1,
"load_source_count": 0,
"next_reader": 79500
},
"time": 287084
}
],
"built_buckets": 25
}
}
]
}
]
}
],
"fetch": {
"type": "fetch",
"description": "",
"time_in_nanos": 288000,
"breakdown": {
"load_stored_fields": 57875,
"load_source": 1625,
"load_stored_fields_count": 1,
"next_reader_count": 1,
"load_source_count": 1,
"next_reader": 110875
},
"debug": {
"stored_fields": [
"_id",
"_routing",
"_source"
]
},
"children": [
{
"type": "FetchFieldsPhase",
"description": "",
"time_in_nanos": 15458,
"breakdown": {
"process_count": 1,
"process": 9458,
"next_reader": 6000,
"next_reader_count": 1
}
},
{
"type": "FetchSourcePhase",
"description": "",
"time_in_nanos": 3249,
"breakdown": {
"process_count": 1,
"process": 3166,
"next_reader": 83,
"next_reader_count": 1
},
"debug": {
"fast_path": 1
}
},
{
"type": "StoredFieldsPhase",
"description": "",
"time_in_nanos": 417,
"breakdown": {
"process_count": 1,
"process": 292,
"next_reader": 125,
"next_reader_count": 1
}
}
]
}
}
]
}
}