I am trying out this guide on semantic search:
My first attempt was a success (hooray!). I did it on a VM provider called linode.com, and I used a VM with 16GB memory, 6 CPUs, ubuntu 24.04 and elasticsearch 9.1.
Then I repeated the experiment locally on VirtualBox, which failed. I installed a guest OS of Ubuntu 24.04 with 12GB memory, 8 CPU cores, ubuntu 24.04 and elasticsearch 9.1. Things failed at this step:
POST _reindex?wait_for_completion=false
{
"source": {
"index": "test-data",
"size": 10
},
"dest": {
"index": "semantic-embeddings"
}
}
I took a look at the task and it shows these errors:
GET _tasks/KP41JOh2QJebxixKnGlEhQ:16355
{
"completed": true,
"task": {
"node": "KP41JOh2QJebxixKnGlEhQ",
"id": 16355,
"type": "transport",
"action": "indices:data/write/reindex",
"status": {
"total": 182469,
"updated": 0,
"created": 0,
"deleted": 0,
"batches": 1,
"version_conflicts": 10,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1,
"throttled_until_millis": 0
},
"description": "reindex from [test-data] to [semantic-embeddings]",
"start_time_in_millis": 1754365306642,
"running_time_in_nanos": 157589444755,
"cancellable": true,
"cancelled": false,
"headers": {
"trace.id": "b9bc6b5958fac8cbcfacb807ba464182"
}
},
"response": {
"took": 157589,
"timed_out": false,
"total": 182469,
"updated": 0,
"created": 0,
"deleted": 0,
"batches": 1,
"version_conflicts": 10,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled": "0s",
"throttled_millis": 0,
"requests_per_second": -1,
"throttled_until": "0s",
"throttled_until_millis": 0,
"failures": [
{
"index": "semantic-embeddings",
"id": "vh1LeJgBl0uqXQhGf0tw",
"cause": {
"type": "inference_exception",
"reason": "Exception when running inference id [.elser-2-elasticsearch] on field [content]",
"caused_by": {
"type": "status_exception",
"reason": "Error in inference process: [inference canceled as process is stopping]"
}
},
"status": 409
},
{
"index": "semantic-embeddings",
"id": "vx1LeJgBl0uqXQhGf0tw",
"cause": {
"type": "inference_exception",
"reason": "Exception when running inference id [.elser-2-elasticsearch] on field [content]",
"caused_by": {
"type": "status_exception",
"reason": "Error in inference process: [inference canceled as process is stopping]"
}
},
"status": 409
},
{
"index": "semantic-embeddings",
"id": "wB1LeJgBl0uqXQhGf0tw",
"cause": {
"type": "inference_exception",
"reason": "Exception when running inference id [.elser-2-elasticsearch] on field [content]",
"caused_by": {
"type": "status_exception",
"reason": "Error in inference process: [inference canceled as process is stopping]"
}
},
"status": 409
},
{
"index": "semantic-embeddings",
"id": "wR1LeJgBl0uqXQhGf0tw",
"cause": {
"type": "inference_exception",
"reason": "Exception when running inference id [.elser-2-elasticsearch] on field [content]",
"caused_by": {
"type": "status_exception",
"reason": "Error in inference process: [inference canceled as process is stopping]"
}
},
"status": 409
},
{
"index": "semantic-embeddings",
"id": "wh1LeJgBl0uqXQhGf0tw",
"cause": {
"type": "inference_exception",
"reason": "Exception when running inference id [.elser-2-elasticsearch] on field [content]",
"caused_by": {
"type": "status_exception",
"reason": "Error in inference process: [inference canceled as process is stopping]"
}
},
"status": 409
},
{
"index": "semantic-embeddings",
"id": "wx1LeJgBl0uqXQhGf0tw",
"cause": {
"type": "inference_exception",
"reason": "Exception when running inference id [.elser-2-elasticsearch] on field [content]",
"caused_by": {
"type": "status_exception",
"reason": "Error in inference process: [inference canceled as process is stopping]"
}
},
"status": 409
},
{
"index": "semantic-embeddings",
"id": "xB1LeJgBl0uqXQhGf0tw",
"cause": {
"type": "inference_exception",
"reason": "Exception when running inference id [.elser-2-elasticsearch] on field [content]",
"caused_by": {
"type": "status_exception",
"reason": "Error in inference process: [inference canceled as process is stopping]"
}
},
"status": 409
},
{
"index": "semantic-embeddings",
"id": "xR1LeJgBl0uqXQhGf0tw",
"cause": {
"type": "inference_exception",
"reason": "Exception when running inference id [.elser-2-elasticsearch] on field [content]",
"caused_by": {
"type": "status_exception",
"reason": "Error in inference process: [inference canceled as process is stopping]"
}
},
"status": 409
},
{
"index": "semantic-embeddings",
"id": "xh1LeJgBl0uqXQhGf0tw",
"cause": {
"type": "inference_exception",
"reason": "Exception when running inference id [.elser-2-elasticsearch] on field [content]",
"caused_by": {
"type": "status_exception",
"reason": "Error in inference process: [inference canceled as process is stopping]"
}
},
"status": 409
},
{
"index": "semantic-embeddings",
"id": "xx1LeJgBl0uqXQhGf0tw",
"cause": {
"type": "inference_exception",
"reason": "Exception when running inference id [.elser-2-elasticsearch] on field [content]",
"caused_by": {
"type": "status_exception",
"reason": "Error in inference process: [inference canceled as process is stopping]"
}
},
"status": 409
}
]
}
}
I looked in /var/log/elasticsearch/es-demo.log
and it shows tehse errors:
[2025-08-05T03:44:24,141][ERROR][o.e.x.m.p.AbstractNativeProcess] [esdemo] [.elser-2-elasticsearch] pytorch_inference/16853 process stopped unexpectedly: Fatal error: 'si_signo 4, si_code: 2, si_errno: 0, address: 0x729644ee0c9f, library: /usr/share/elasticsearch/modules/x-pack-ml/platform/linux-x86_64/bin/../lib/libtorch_cpu.so, base: 0x72963da00000, normalized address: 0x74e0c9f', version: 9.1.0 (build c56874a35d5f4c)
[2025-08-05T03:44:24,142][ERROR][o.e.x.m.i.d.DeploymentManager] [esdemo] [.elser-2-elasticsearch] inference process crashed due to reason [[.elser-2-elasticsearch] pytorch_inference/16853 process stopped unexpectedly: Fatal error: 'si_signo 4, si_code: 2, si_errno: 0, address: 0x729644ee0c9f, library: /usr/share/elasticsearch/modules/x-pack-ml/platform/linux-x86_64/bin/../lib/libtorch_cpu.so, base: 0x72963da00000, normalized address: 0x74e0c9f', version: 9.1.0 (build c56874a35d5f4c)
]
[2025-08-05T03:44:24,144][INFO ][o.e.x.m.i.d.DeploymentManager] [esdemo] Inference process [.elser-2-elasticsearch] failed due to [[.elser-2-elasticsearch] pytorch_inference/16853 process stopped unexpectedly: Fatal error: 'si_signo 4, si_code: 2, si_errno: 0, address: 0x729644ee0c9f, library: /usr/share/elasticsearch/modules/x-pack-ml/platform/linux-x86_64/bin/../lib/libtorch_cpu.so, base: 0x72963da00000, normalized address: 0x74e0c9f', version: 9.1.0 (build c56874a35d5f4c)
]. This is the [1] failure in 24 hours, and the process will be restarted.
[2025-08-05T03:44:24,142][ERROR][o.e.x.m.i.p.p.PyTorchResultProcessor] [esdemo] [.elser-2-elasticsearch] Error processing results
org.elasticsearch.xcontent.XContentEOFException: [3:1] Unexpected end of file
at org.elasticsearch.xcontent.provider.json.JsonXContentParser.nextToken(JsonXContentParser.java:64) ~[?:?]
at org.elasticsearch.xpack.ml.process.ProcessResultsParser$ResultIterator.hasNext(ProcessResultsParser.java:70) ~[?:?]
at org.elasticsearch.xpack.ml.inference.pytorch.process.PyTorchResultProcessor.process(PyTorchResultProcessor.java:105) ~[?:?]
at org.elasticsearch.xpack.ml.inference.deployment.DeploymentManager.lambda$startDeployment$2(DeploymentManager.java:180) ~[?:?]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:977) ~[elasticsearch-9.1.0.jar:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1095) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:619) ~[?:?]
at java.lang.Thread.run(Thread.java:1447) ~[?:?]
Caused by: com.fasterxml.jackson.core.io.JsonEOFException: Unexpected end-of-input: expected close marker for Array (start marker at [Source: (FileInputStream); line: 2, column: 1])
at [Source: (FileInputStream); line: 3, column: 1]
at com.fasterxml.jackson.core.base.ParserMinimalBase._reportInvalidEOF(ParserMinimalBase.java:585) ~[?:?]
at com.fasterxml.jackson.core.base.ParserBase._handleEOF(ParserBase.java:535) ~[?:?]
at com.fasterxml.jackson.core.base.ParserBase._eofAsNextChar(ParserBase.java:552) ~[?:?]
at com.fasterxml.jackson.core.json.UTF8StreamJsonParser._skipWSOrEnd2(UTF8StreamJsonParser.java:3135) ~[?:?]
at com.fasterxml.jackson.core.json.UTF8StreamJsonParser._skipWSOrEnd(UTF8StreamJsonParser.java:3105) ~[?:?]
at com.fasterxml.jackson.core.json.UTF8StreamJsonParser.nextToken(UTF8StreamJsonParser.java:716) ~[?:?]
at org.elasticsearch.xcontent.provider.json.JsonXContentParser.nextToken(JsonXContentParser.java:61) ~[?:?]
... 7 more
I repeated the experiment on another VM served on proxmox machine, but it also gave the same types of errors as the output above.
How come things are only working on some machines but not others?