Offline deployment pretrained of models (original) (raw)

OpenSearch 2.12.0 :

Attempting to deploy the pretrained models on a server which has no internet access:
Model being deployed https://artifacts.opensearch.org/models/ml-models/huggingface/sentence-transformers/all-distilroberta-v1/1.0.1/torch_script/sentence-transformers_all-distilroberta-v1-1.0.1-torch_script.zip

Followed the below steps as mentioned in the Set up an ML language model doc

PUT _cluster/settings
{
  "persistent": {
    "plugins": {
      "ml_commons": {
        "only_run_on_ml_node": "true",
        "model_access_control_enabled": "true",
        "native_memory_threshold": "99",
        "allow_registering_model_via_url": "true"
      }
    }
  }
}
POST /_plugins/_ml/model_groups/_register
{
  "name": "ml_model_group_sentence_transformers",
  "description": "A model group for sentence transformer",
  "access_mode": "public"
}
GET _plugins/_ml/model_groups/jFICf48BvNyDZcmaRMm1

{
  "name": "ml_model_group_sentence_transformers",
  "latest_version": 12,
  "description": "A model group for sentence transformer",
  "owner": {
    "name": "admin",
    "backend_roles": [
      "admin"
    ],
    "roles": [
      "own_index",
      "all_access"
    ],
    "custom_attribute_names": [],
    "user_requested_tenant": "admin_tenant"
  },
  "access": "public",
  "created_time": 1715822806196,
  "last_updated_time": 1716260811689
}
POST /_plugins/_ml/models/_register
{
  "name": "huggingface/sentence-transformers/all-distilroberta-v1",
  "version": "1.0.1",
  "model_group_id": "IE5fX48BvNyDZcmaO4Wy",
  "model_format": "TORCH_SCRIPT"
}

{
  "task_type": "REGISTER_MODEL",
  "function_name": "TEXT_EMBEDDING",
  "state": "FAILED",
  "worker_node": [
    "_dpa206sRXeuoA6LVIVvgA"
  ],
  "create_time": 1715292140960,
  "last_update_time": 1715292273277,
  "error": "Connection timed out",
  "is_async": true
}

So I had to configure an internal mirror for the ML model artifacts and then the following worked

POST /_plugins/_ml/models/_register
{
  "name": "huggingface/sentence-transformers/all-MiniLM-L6-v2",
  "version": "1.0.1",
  "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.",
  "model_task_type": "TEXT_EMBEDDING",
  "model_format": "TORCH_SCRIPT",
  "model_content_size_in_bytes": 91790008,
  "model_content_hash_value": "c15f0d2e62d872be5b5bc6c84d2e0f4921541e29fefbef51d59cc10a8ae30e0f",
  "model_config": {
    "model_type": "bert",
    "embedding_dimension": 384,
    "framework_type": "sentence_transformers",
    "all_config": """{"_name_or_path":"nreimers/MiniLM-L6-H384-uncased","architectures":["BertModel"],"attention_probs_dropout_prob":0.1,"gradient_checkpointing":false,"hidden_act":"gelu","hidden_dropout_prob":0.1,"hidden_size":384,"initializer_range":0.02,"intermediate_size":1536,"layer_norm_eps":1e-12,"max_position_embeddings":512,"model_type":"bert","num_attention_heads":12,"num_hidden_layers":6,"pad_token_id":0,"position_embedding_type":"absolute","transformers_version":"4.8.2","type_vocab_size":2,"use_cache":true,"vocab_size":30522}"""
  },
  "created_time": 1676328997102,
  "url": "https://some-internal-mirror.com/opensearch/models/ml-models/huggingface/sentence-transformers/all-MiniLM-L6-v2/1.0.1/torch_script/sentence-transformers_all-MiniLM-L6-v2-1.0.1-torch_script.zip",
  "model_group_id": "jFICf48BvNyDZcmaRMm1"
}
{
  "model_id": "kaxYmY8BPuGHzr9Two-b",
  "task_type": "REGISTER_MODEL",
  "function_name": "TEXT_EMBEDDING",
  "state": "COMPLETED",
  "worker_node": [
    "bXBS993MSreq8GU4eW8dhw"
  ],
  "create_time": 1716264682119,
  "last_update_time": 1716264695414,
  "is_async": true
}
POST /_plugins/_ml/models/kaxYmY8BPuGHzr9Two-b/_deploy

GET /_plugins/_ml/tasks/BltZmY8BvNyDZcmadPbu
export PYTORCH_LIBRARY_PATH=$HOME/.local/lib/python3.9/site-packages/torch/lib/
export PYTORCH_VERSION=1.13.1
export PYTORCH_FLAVOR=cpu

I also had to grant permissions to the Java Security Manager to the above location.

org.opensearch.ml.common.exception.MLException: Failed to deploy model kaxYmY8BPuGHzr9Two-b
at org.opensearch.ml.engine.algorithms.DLModel.lambda$loadModel$1(DLModel.java:294) ~[?:?]
at java.base/java.security.AccessController.doPrivileged(AccessController.java:569) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.loadModel(DLModel.java:247) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.initModel(DLModel.java:139) ~[?:?]
at org.opensearch.ml.engine.MLEngine.deploy(MLEngine.java:125) ~[?:?]
at org.opensearch.ml.model.MLModelManager.lambda$deployModel$51(MLModelManager.java:1020) ~[?:?]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.12.0.jar:2.12.0]
at org.opensearch.ml.model.MLModelManager.lambda$retrieveModelChunks$72(MLModelManager.java:1553) [opensearch-ml-2.12.0.0.jar:2.12.0.0]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.12.0.jar:2.12.0]
at org.opensearch.action.support.ThreadedActionListener$1.doRun(ThreadedActionListener.java:78) [opensearch-2.12.0.jar:2.12.0]
at org.opensearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:913) [opensearch-2.12.0.jar:2.12.0]
at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) [opensearch-2.12.0.jar:2.12.0]
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) [?:?]
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) [?:?]
at java.base/java.lang.Thread.run(Thread.java:840) [?:?]
Caused by: ai.djl.engine.EngineException: Cannot download jni files: https://publish.djl.ai/pytorch/1.13.1/jnilib/0.21.0/linux-x86_64/cpu/libdjl_torch.so

export ENGINE_CACHE_DIR=$HOME/.djl.ai/
export DJL_OFFLINE=true

Let me know what is the procedure to allow these libraries to be loaded in offline mode. Am I missing something here.