I'm using elasticsearch 8.2.3, docker version locally with a free trial license.
I imported this model with eland:
eland_import_hub_model --url http://localhost:9203 --hub-model-id sentence-transformers/distilbert-base-nli-stsb-mean-tokens --task-type text_embedding --start
And then it says Model successfully imported.
When I call
GET _ml/trained_models/_stats
I get this response:
{
"count" : 2,
"trained_model_stats" : [
...
{
"model_id" : "sentence-transformers__distilbert-base-nli-stsb-mean-tokens",
"model_size_stats" : {
"model_size_bytes" : 265707040,
"required_native_memory_bytes" : 783072320
},
"pipeline_count" : 0,
"deployment_stats" : {
"model_id" : "sentence-transformers__distilbert-base-nli-stsb-mean-tokens",
"inference_threads" : 1,
"model_threads" : 1,
"queue_capacity" : 1024,
"state" : "starting",
"allocation_status" : {
"allocation_count" : 0,
"target_allocation_count" : 1,
"state" : "starting"
},
"start_time" : 1656522931226,
"peak_throughput_per_minute" : 0,
"nodes" : [
{
"node" : {
"1IIIL5CQS9W6SwlB_OWnfg" : {
"name" : "3607b60332b4",
"ephemeral_id" : "-f74YlcRTg6-Exu-J0f4vQ",
"transport_address" : "172.23.0.2:9300",
"attributes" : {
"ml.max_jvm_size" : "1073741824",
"xpack.installed" : "true",
"ml.machine_memory" : "8346603520"
},
"roles" : [
"data",
"data_cold",
"data_content",
"data_frozen",
"data_hot",
"data_warm",
"ingest",
"master",
"ml",
"remote_cluster_client",
"transform"
]
}
},
"routing_state" : {
"routing_state" : "failed",
"reason" : "inference process crashed due to reason [[sentence-transformers__distilbert-base-nli-stsb-mean-tokens] pytorch_inference/4323 process stopped unexpectedly: ]"
},
"peak_throughput_per_minute" : 0,
"throughput_last_minute" : 0
}
]
}
}
]
}
Any advice on what might cause "inference process crashed due to reason [[sentence-transformers__distilbert-base-nli-stsb-mean-tokens] pytorch_inference/4323 process stopped unexpectedly: "?
Also, found these in the logs in the elasticsearch container:
{"@timestamp":"2022-06-29T17:15:42.454Z", "log.level":"ERROR", "message":"[sentence-transformers__distilbert-base-nli-stsb-mean-tokens] pytorch_inference/4323 process stopped unexpectedly: ", "ecs.version": "1.2.0","service.name":"ES_ECS","event.dataset":"elasticsearch.server","process.thread.name":"elasticsearch[3607b60332b4][ml_job_comms][T#1]","log.logger":"org.elasticsearch.xpack.ml.process.AbstractNativeProcess","elasticsearch.cluster.uuid":"dxO-jitLRySmpPHifUz7ig","elasticsearch.node.id":"1IIIL5CQS9W6SwlB_OWnfg","elasticsearch.node.name":"3607b60332b4","elasticsearch.cluster.name":"docker-cluster"}
{"@timestamp":"2022-06-29T17:15:42.472Z", "log.level":"ERROR", "message":"[sentence-transformers__distilbert-base-nli-stsb-mean-tokens] inference process crashed due to reason [[sentence-transformers__distilbert-base-nli-stsb-mean-tokens] pytorch_inference/4323 process stopped unexpectedly: ]", "ecs.version": "1.2.0","service.name":"ES_ECS","event.dataset":"elasticsearch.server","process.thread.name":"elasticsearch[3607b60332b4][ml_job_comms][T#1]","log.logger":"org.elasticsearch.xpack.ml.inference.deployment.DeploymentManager","elasticsearch.cluster.uuid":"dxO-jitLRySmpPHifUz7ig","elasticsearch.node.id":"1IIIL5CQS9W6SwlB_OWnfg","elasticsearch.node.name":"3607b60332b4","elasticsearch.cluster.name":"docker-cluster"}
{"@timestamp":"2022-06-29T17:15:42.532Z", "log.level":"ERROR", "message":"[controller/304] [CDetachedProcessSpawner.cc@188] Child process with PID 4323 was terminated by signal 9", "ecs.version": "1.2.0","service.name":"ES_ECS","event.dataset":"elasticsearch.server","process.thread.name":"ml-cpp-log-tail-thread","log.logger":"org.elasticsearch.xpack.ml.process.logging.CppLogMessageHandler","elasticsearch.cluster.uuid":"dxO-jitLRySmpPHifUz7ig","elasticsearch.node.id":"1IIIL5CQS9W6SwlB_OWnfg","elasticsearch.node.name":"3607b60332b4","elasticsearch.cluster.name":"docker-cluster"}