Hi everyone,
We recently had a big spike in S3 transfers and subsequently costs, which let me to troubleshoot the configuration of our elastic stack. I’ve found the cluster setting indices.recovery.use_snapshots which defaults to true and I set it to false. I also set the Snapshot Repository setting use_for_peer_recovery: false.
According to the documentation (8.13, latest), this should lead to peer recoveries copying shard data from one node to the other node, instead of using the repository for peer recovery (“When indices.recovery.use_snapshots is false Elasticsearch will construct this new copy by transferring the index data from the current primary.”). It also states that “If none of the registered repositories have this setting defined, index files will be recovered from the source node”.
Looking at the actual recoveries done after these changes reveals that they are still downloading shard data from the snapshot repository instead of copying it from the source node. (Change was done 2025-10-21 around 14:00, examples below are from 2025-10-22)
These peer recoveries were all done to rebalance the cluster.
Am I missing something crucial here, or is this setting not working?
More infos regarding our setup:
I know that this is an old version of ES, but I have not found any info in GitHub or elsewhere that would suggest changes to these settings happened.
ILM policy creates searchable snapshots after hot rollover, cold is configured with searchable snapshots for 30-60 days after rollover, deletion happens between 90d and 1y.
//GET /_cluster/settings
{
"persistent": {
"action": {
"auto_create_index": ".ent-search-*-logs-*,-.ent-search-*,+*"
},
"cluster": {
"routing": {
"allocation": {
"node_concurrent_incoming_recoveries": "4",
"disk": {
"watermark": {
"low": {
"max_headroom": "185GB"
},
"flood_stage": {
"max_headroom": "70GB"
},
"high": {
"max_headroom": "120GB"
}
}
},
"node_initial_primaries_recoveries": "6",
"balance": {
"threshold": "50"
},
"node_concurrent_outgoing_recoveries": "4",
"cluster_concurrent_rebalance": "4",
"node_concurrent_recoveries": "4"
}
},
"max_shards_per_node": "13500"
},
"indices": {
"recovery": {
"use_snapshots": "false"
}
},
"search": {
"max_async_search_response_size": "50mb"
}
}
}
//GET _snapshot/searchable_snapshot
{
"searchable_snapshot": {
"type": "s3",
"uuid": "<redacted>",
"settings": {
"bucket": "<redacted>",
"region": "eu-central-1",
"use_for_peer_recovery": "false"
}
}
}
//GET _recovery?active_only=false&detailed=false&human=true
"restored-.ds-logs-network_traffic.icmp-otc2_audit_prod-2024.11.08-000008": {
"shards": [
{
"id": 0,
"type": "PEER",
"stage": "DONE",
"primary": true,
"start_time": "2025-10-22T06:18:53.516Z",
"start_time_in_millis": 1761113933516,
"stop_time": "2025-10-22T06:19:03.282Z",
"stop_time_in_millis": 1761113943282,
"total_time": "9.7s",
"total_time_in_millis": 9765,
"source": {
"id": "IyJ55ugtTQafWBo1gx0-pg",
"host": "172.29.12.153",
"transport_address": "172.29.12.153:9300",
"ip": "172.29.12.153",
"name": "elk12-data"
},
"target": {
"id": "aobm-H99R42lWExkxhFzpA",
"host": "172.29.12.148",
"transport_address": "172.29.12.148:9300",
"ip": "172.29.12.148",
"name": "elk7-data"
},
"index": {
"size": {
"total": "359.1mb",
"total_in_bytes": 376577524,
"reused": "824b",
"reused_in_bytes": 824,
"recovered": "359.1mb",
"recovered_in_bytes": 376576700,
"recovered_from_snapshot": "359.1mb",
"recovered_from_snapshot_in_bytes": 376576292,
"percent": "100.0%"
},
"files": {
"total": 5,
"reused": 2,
"recovered": 3,
"percent": "100.0%"
},
"total_time": "9.7s",
"total_time_in_millis": 9765,
"source_throttle_time": "0s",
"source_throttle_time_in_millis": 0,
"target_throttle_time": "-1",
"target_throttle_time_in_millis": 0
},
"translog": {
"recovered": 0,
"total": 0,
"percent": "100.0%",
"total_on_start": 0,
"total_time": "0s",
"total_time_in_millis": 0
},
"verify_index": {
"check_index_time": "0s",
"check_index_time_in_millis": 0,
"total_time": "0s",
"total_time_in_millis": 0
}
}
]
},
"restored-corp_ad_winevent-008127": {
"shards": [
{
"id": 0,
"type": "PEER",
"stage": "DONE",
"primary": true,
"start_time": "2025-10-22T04:54:05.017Z",
"start_time_in_millis": 1761108845017,
"stop_time": "2025-10-22T05:48:11.759Z",
"stop_time_in_millis": 1761112091759,
"total_time": "54.1m",
"total_time_in_millis": 3246741,
"source": {
"id": "aobm-H99R42lWExkxhFzpA",
"host": "172.29.12.148",
"transport_address": "172.29.12.148:9300",
"ip": "172.29.12.148",
"name": "elk7-data"
},
"target": {
"id": "2IfXZEt5T_-3Cdf5at9YDQ",
"host": "172.29.12.150",
"transport_address": "172.29.12.150:9300",
"ip": "172.29.12.150",
"name": "elk9-data"
},
"index": {
"size": {
"total": "61gb",
"total_in_bytes": 65583714433,
"reused": "1kb",
"reused_in_bytes": 1109,
"recovered": "61gb",
"recovered_in_bytes": 65583713324,
"recovered_from_snapshot": "61gb",
"recovered_from_snapshot_in_bytes": 65583712912,
"percent": "100.0%"
},
"files": {
"total": 22,
"reused": 2,
"recovered": 20,
"percent": "100.0%"
},
"total_time": "54.1m",
"total_time_in_millis": 3246741,
"source_throttle_time": "0s",
"source_throttle_time_in_millis": 0,
"target_throttle_time": "-1",
"target_throttle_time_in_millis": 0
},
"translog": {
"recovered": 0,
"total": 0,
"percent": "100.0%",
"total_on_start": 0,
"total_time": "0s",
"total_time_in_millis": 0
},
"verify_index": {
"check_index_time": "0s",
"check_index_time_in_millis": 0,
"total_time": "0s",
"total_time_in_millis": 0
}
}
]
}