Machine Learning - Trained Model: I tried: (Look like magic number is 18)
a. Number of allocations: 9 (Before: 18)
b. Threads per allocation: 2 (Before: 1)
c. Went well for a short while and then hanged:
Task Information:
{
"completed": false,
"task": {
"node": "jUvcJZ0PR0uytvtvHL8o7g",
"id": 12215573,
"type": "transport",
"action": "indices:data/write/reindex",
"status": {
"total": 9394509,
"updated": 0,
"created": 13952,
"deleted": 0,
"batches": 787,
"version_conflicts": 5698,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1,
"throttled_until_millis": 0
},
"description": "reindex from [products_en_ca] to [products_en_ca_bdo2]",
"start_time_in_millis": 1687524933896,
"running_time_in_nanos": 1393859398089,
"cancellable": true,
"cancelled": false,
"headers": {
"trace.id": "94785796642efca566b9f213fc353ddb"
}
}
}
Trained Model Stats:
{
"count": 1,
"trained_model_stats": [
{
"model_id": "hugging_face_model",
"model_size_stats": {
"model_size_bytes": 669494243,
"required_native_memory_bytes": 1590646726
},
"pipeline_count": 1,
"ingest": {
"total": {
"count": 45906,
"time_in_millis": 2545095017,
"current": 1,
"failed": 0
},
"pipelines": {
"product-search-vector-bdo": {
"count": 45906,
"time_in_millis": 2545095017,
"current": 1,
"failed": 0,
"processors": [
{
"set": {
"type": "set",
"stats": {
"count": 45907,
"time_in_millis": 390,
"current": 0,
"failed": 0
}
}
},
{
"inference": {
"type": "inference",
"stats": {
"count": 45906,
"time_in_millis": 2545094486,
"current": 1,
"failed": 328
}
}
}
]
}
}
},
"inference_stats": {
"failure_count": 0,
"inference_count": 19674,
"cache_miss_count": 0,
"missing_all_fields_count": 0,
"timestamp": 1687526806543
},
"deployment_stats": {
"deployment_id": "hugging_face_model",
"model_id": "hugging_face_model",
"threads_per_allocation": 2,
"number_of_allocations": 9,
"queue_capacity": 1024,
"state": "started",
"allocation_status": {
"allocation_count": 9,
"target_allocation_count": 9,
"state": "fully_allocated"
},
"cache_size": "638.4mb",
"priority": "normal",
"start_time": 1687524865801,
"inference_count": 19674,
"peak_throughput_per_minute": 1929,
"nodes": [
{
"node": {
"ggo3sgrGS6KeJ86C7BPhqw": {
"name": "instance-0000000015",
"ephemeral_id": "VPylTl2kQlmU8AttV_jzDA",
"transport_address": "172.18.128.46:19193",
"external_id": "instance-0000000015",
"attributes": {
"ml.allocated_processors": "18",
"availability_zone": "us-east-2a",
"server_name": "instance-0000000015.d0a9eb97ab874c5abfbc8aa70c9b6009",
"logical_availability_zone": "zone-0",
"ml.max_jvm_size": "8589934592",
"region": "us-east-2",
"ml.machine_memory": "34359738368",
"ml.allocated_processors_double": "18.0",
"xpack.installed": "true",
"instance_configuration": "aws.es.ml.c5d"
},
"roles": [
"ml",
"remote_cluster_client"
],
"version": "8.8.1"
}
},
"routing_state": {
"routing_state": "started"
},
"inference_count": 19674,
"average_inference_time_ms": 223.29195893056826,
"average_inference_time_ms_excluding_cache_hits": 215.96183706943685,
"inference_cache_hit_count": 1384,
"last_access": 1687525808124,
"number_of_pending_requests": 1,
"start_time": 1687524866710,
"threads_per_allocation": 2,
"number_of_allocations": 9,
"peak_throughput_per_minute": 1929,
"throughput_last_minute": 0,
"inference_cache_hit_count_last_minute": 0
}
]
}
}
]
}