Performance hit when multiple filebeats are sending to same ES

I'm now running 3 ES nodes in the cluster, with 1 filebeat instance. The highest indexing I've seen is 28.7K/s, but it's on average above 25K/s. The packet drop is about 32%.

I ran this a few times, and the non-zero threads are

node-3 management           1 0 0
node-3 write                3 0 0
node-4 management           1 0 0
node-4 search_coordination  1 0 0
node-4 refresh              1 0 0
node-4 write                4 0 0
node-5 management           1 0 0
node-5 refresh              1 0 0
node-5 search_coordination  1 0 0
node-5 write                5 0 0

Only the _write and _management threads are non-zero all the time, the others are sometimes all zeros. Also, the _write range between 1 0 0 and 5 0 0 (as seen thus far), and the highest value alternate among the 3 nodes, i.e. no one node is consistently the highest.

I can't copy the entire output here. Is there any specific values you're looking for? I've copied the ones I thought were relevant here.

"indices": {
  "count": 149,
  "shards": {
    "total": 865,
	"primaries": 821,
	"replication": 0.05359,
	"index": {
	  "shards": {
	    "min": 1,
		"max": 8,
		"avg": 5.805
	  },
	  "primiaries": {
	    "min": 1,
		"max": 8,
		"avg": 5.510
	  },
	  "replication": {
	    "min": 0,
		"max": 1,
		"avg": 0.295
	  }
	}
  },
  "docs": {
	"count": 5614635389,
	"deleted": 2961229
  },
  "store": {
	"size_in_bytes": 4458135794391,
	"total_data_set_size_in_bytes": 4458135794391,
	"reserved_in_bytes": 0
  },
  "fielddata": {
	"memory_size_in_bytes": 76208,
	"evictions": 0
  },
  "query_cache": {
	"memory_size_in_bytes": 39251123,
	"total_count": 36180242,
	"hit_count": 7489377,
	"miss_count": 28690865,
	"cache_size": 59627,
	"cache_count": 338184,
	"evictions": 278557
  },
  "completion": {
	"size_in_bytes": 0
  },
  "segments": {
	"count": 19647,
	"memory_in_bytes": 0,
	"terms_memory_in_bytes": 0,
	"stored_fields_memory_in_bytes": 0,
	"term_vectors_memory_in_bytes": 0,
	"norms_memory_in_bytes": 0,
	"points_memory_in_bytes": 0,
	"doc_values_memory_in_bytes": 0,
	"index_writer_memory_in_bytes": 27677624,
	"version_map_memory_in_bytes": 0,
	"fixed_bit_set_memory_in_bytes": 14957976,
	"max_unsafe_auto_id_timestamp": 1687136221116,
	"file_sizes": {}
  },
  ...,
  "nodes": {
	"os": {
	  "available_processors": 288,
	  "allocated_processors": 288,
	  ...,
	  "mem": {
	    "total_in_bytes": 2432527048704,
		"adjusted_total_in_bytes": 2432527048704,
		"free_in_bytes": 24682225664,
		"used_in_bytes": 2407844823040,
		"free_percent": 1,
		"used_percent": 99
	  }
	},
	"process": {
	  "cpu": {
		"percent": 9
	  },
	  "open_file_descriptors": {
		"min": 3141,
		"max": 3401,
		"avg": 3265
	  }
	},
	"jvm": {
	  ...,
	  "mem": {
		"heap_used_in_bytes": 33092436752,
		"heap_max_in_bytes": 96636764160
	  },
	  "threads": 1367
	},
	"fs": {
	  "total_in_bytes": 19993490964480,
	  "free_in_bytes": 15529135980544,
	  "available_in_bytes": 14521446711296
	},
	"ingest": {
	  "number_of_pipelines": 3,
	  "processor_stats": {
	    "conditional": {
		  "count": 22525863239,
		  "failed": 0,
		  "current": 2,
		  "time_in_millis": 201656155
		},
		"geoip": {
		  "count": 15017242898,
		  "failed": 0,
		  "current": 0,
		  "time_in_millis": 64156818
		},
		...,
		"rename": {
		  "count": 30034485796,
		  "failed": 0,
		  "current": 0,
		  "time_in_millis": 23136001
		},
		...,
		"set": {
		  "count": 7508621451,
		  "failed": 0,
		  "current": 0,
		  "time_in_millis": 31491921
		}
	  }
	},,
	"indexing_pressure": {
	  "memory": {
		"current": {
		  "combined_coordinating_and_primary_in_bytes": 0,
		  "coordinating_in_bytes": 0,
		  "primary_in_bytes": 0,
		  "replica_in_bytes": 0,
		  "all_in_bytes": 0
		},
		"total": {
		  "combined_coordinating_and_primary_in_bytes": 0,
		  "coordinating_in_bytes": 0,
		  "primary_in_bytes": 0,
		  "replica_in_bytes": 0,
		  "all_in_bytes": 0,
		  "coordinating_rejections": 0,
		  "primary_rejections": 0,
		  "repica_rejections": 0
		},
		"limit_in_bytes": 0
	  }
	}
  }
}