```shell
-$ HUGGING_FACE_HUB_TOKEN=...
+$ HF_TOKEN=...
$ WANDB_API_KEY=...
$ dstack apply -f examples/fine-tuning/axolotl/train.dstack.yml
```
@@ -116,7 +116,7 @@ If you'd like to play with the example using a dev environment, run
```shell
-$ HUGGING_FACE_HUB_TOKEN=...
+$ HF_TOKEN=...
$ WANDB_API_KEY=...
$ dstack apply -f examples/fine-tuning/axolotl/.dstack.yaml
```
diff --git a/examples/fine-tuning/axolotl/amd/build.flash-attention.yaml b/examples/fine-tuning/axolotl/amd/build.flash-attention.yaml
index 1468bf8dc..c60e993a3 100644
--- a/examples/fine-tuning/axolotl/amd/build.flash-attention.yaml
+++ b/examples/fine-tuning/axolotl/amd/build.flash-attention.yaml
@@ -6,7 +6,7 @@ image: runpod/pytorch:2.1.2-py3.10-rocm6.0.2-ubuntu22.04
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- GPU_ARCHS="gfx90a;gfx942"
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
diff --git a/examples/fine-tuning/axolotl/amd/build.xformers.yaml b/examples/fine-tuning/axolotl/amd/build.xformers.yaml
index a3733ec50..49cbc1e7a 100644
--- a/examples/fine-tuning/axolotl/amd/build.xformers.yaml
+++ b/examples/fine-tuning/axolotl/amd/build.xformers.yaml
@@ -6,7 +6,7 @@ image: runpod/pytorch:2.1.2-py3.10-rocm6.0.2-ubuntu22.04
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- GPU_ARCHS="gfx90a;gfx942"
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
diff --git a/examples/fine-tuning/axolotl/amd/train.dstack.yaml b/examples/fine-tuning/axolotl/amd/train.dstack.yaml
index 5de02b353..80921fdb4 100644
--- a/examples/fine-tuning/axolotl/amd/train.dstack.yaml
+++ b/examples/fine-tuning/axolotl/amd/train.dstack.yaml
@@ -6,7 +6,7 @@ image: runpod/pytorch:2.1.2-py3.10-rocm6.0.2-ubuntu22.04
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
# Commands of the task
commands:
- export PATH=/opt/conda/envs/py_3.10/bin:$PATH
diff --git a/examples/fine-tuning/axolotl/train.dstack.yaml b/examples/fine-tuning/axolotl/train.dstack.yaml
index 3dd8b8ddb..38d543110 100644
--- a/examples/fine-tuning/axolotl/train.dstack.yaml
+++ b/examples/fine-tuning/axolotl/train.dstack.yaml
@@ -7,7 +7,7 @@ image: winglian/axolotl-cloud:main-20240429-py3.11-cu121-2.2.1
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- WANDB_API_KEY
# Commands of the task
commands:
diff --git a/examples/fine-tuning/optimum-tpu/llama31/.dstack.yml b/examples/fine-tuning/optimum-tpu/llama31/.dstack.yml
index 8dc522e0e..2577f2f9a 100644
--- a/examples/fine-tuning/optimum-tpu/llama31/.dstack.yml
+++ b/examples/fine-tuning/optimum-tpu/llama31/.dstack.yml
@@ -7,7 +7,7 @@ python: "3.11"
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
# Refer to Note section in examples/gpus/tpu/README.md for more information about the optimum-tpu repository.
# Uncomment if you want the environment to be pre-installed
diff --git a/examples/fine-tuning/optimum-tpu/llama31/train.dstack.yml b/examples/fine-tuning/optimum-tpu/llama31/train.dstack.yml
index 04fdfb744..4a4234177 100644
--- a/examples/fine-tuning/optimum-tpu/llama31/train.dstack.yml
+++ b/examples/fine-tuning/optimum-tpu/llama31/train.dstack.yml
@@ -6,7 +6,7 @@ python: "3.11"
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
# Commands of the task
commands:
diff --git a/examples/fine-tuning/qlora/train.dstack.yml b/examples/fine-tuning/qlora/train.dstack.yml
index a51bb1ff0..5f8785e4a 100644
--- a/examples/fine-tuning/qlora/train.dstack.yml
+++ b/examples/fine-tuning/qlora/train.dstack.yml
@@ -3,7 +3,7 @@ type: task
python: "3.11"
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- HF_HUB_ENABLE_HF_TRANSFER=1
commands:
diff --git a/examples/fine-tuning/trl/.dstack.yml b/examples/fine-tuning/trl/.dstack.yml
index 13685d624..b9720c326 100644
--- a/examples/fine-tuning/trl/.dstack.yml
+++ b/examples/fine-tuning/trl/.dstack.yml
@@ -7,7 +7,7 @@ python: "3.10"
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- ACCELERATE_LOG_LEVEL=info
- WANDB_API_KEY
# Uncomment if you want the environment to be pre-installed
diff --git a/examples/fine-tuning/trl/README.md b/examples/fine-tuning/trl/README.md
index 5cffec021..03b33ff4e 100644
--- a/examples/fine-tuning/trl/README.md
+++ b/examples/fine-tuning/trl/README.md
@@ -32,7 +32,7 @@ python: "3.10"
nvcc: true
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- WANDB_API_KEY
commands:
- pip install "transformers>=4.43.2"
@@ -108,7 +108,7 @@ python: "3.10"
nvcc: true
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- WANDB_API_KEY
commands:
- pip install "transformers>=4.43.2"
diff --git a/examples/fine-tuning/trl/amd/train.dstack.yaml b/examples/fine-tuning/trl/amd/train.dstack.yaml
index 69b8744c3..3a41dc3fe 100644
--- a/examples/fine-tuning/trl/amd/train.dstack.yaml
+++ b/examples/fine-tuning/trl/amd/train.dstack.yaml
@@ -7,7 +7,7 @@ image: runpod/pytorch:2.1.2-py3.10-rocm6.1-ubuntu22.04
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
commands:
- export PATH=/opt/conda/envs/py_3.10/bin:$PATH
diff --git a/examples/fine-tuning/trl/train-distrib.dstack.yml b/examples/fine-tuning/trl/train-distrib.dstack.yml
index d8af736cf..18987f80e 100644
--- a/examples/fine-tuning/trl/train-distrib.dstack.yml
+++ b/examples/fine-tuning/trl/train-distrib.dstack.yml
@@ -9,7 +9,7 @@ python: "3.10"
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- ACCELERATE_LOG_LEVEL=info
- WANDB_API_KEY
# Commands of the task
diff --git a/examples/fine-tuning/trl/train.dstack.yml b/examples/fine-tuning/trl/train.dstack.yml
index c91783b7a..a0f3f674c 100644
--- a/examples/fine-tuning/trl/train.dstack.yml
+++ b/examples/fine-tuning/trl/train.dstack.yml
@@ -6,7 +6,7 @@ python: "3.10"
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- ACCELERATE_LOG_LEVEL=info
- WANDB_API_KEY
# Commands of the task
diff --git a/examples/llms/llama31/.dstack.yml b/examples/llms/llama31/.dstack.yml
index b9782c82a..e19289978 100644
--- a/examples/llms/llama31/.dstack.yml
+++ b/examples/llms/llama31/.dstack.yml
@@ -7,7 +7,7 @@ python: "3.10"
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
ide: vscode
# Use either spot or on-demand instances
diff --git a/examples/llms/llama31/README.md b/examples/llms/llama31/README.md
index 605b51cfd..6230db264 100644
--- a/examples/llms/llama31/README.md
+++ b/examples/llms/llama31/README.md
@@ -34,7 +34,7 @@ Below is the configuration file for the task.
python: "3.10"
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct
- MAX_MODEL_LEN=4096
commands:
@@ -67,7 +67,7 @@ Below is the configuration file for the task.
image: ghcr.io/huggingface/text-generation-inference:latest
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct
- MAX_INPUT_LENGTH=4000
- MAX_TOTAL_TOKENS=4096
@@ -161,7 +161,7 @@ To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/referenc
```shell
-$ HUGGING_FACE_HUB_TOKEN=...
+$ HF_TOKEN=...
$ dstack apply -f examples/llms/llama31/vllm/task.dstack.yml
@@ -226,7 +226,7 @@ python: "3.10"
nvcc: true
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- WANDB_API_KEY
commands:
- pip install "transformers>=4.43.2"
@@ -312,7 +312,7 @@ python: "3.10"
nvcc: true
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- WANDB_API_KEY
commands:
- pip install "transformers>=4.43.2"
diff --git a/examples/llms/llama31/tgi/.dstack.yml b/examples/llms/llama31/tgi/.dstack.yml
index c1ddb4ab0..e2ce95819 100644
--- a/examples/llms/llama31/tgi/.dstack.yml
+++ b/examples/llms/llama31/tgi/.dstack.yml
@@ -7,7 +7,7 @@ image: ghcr.io/huggingface/text-generation-inference:latest
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
ide: vscode
# Use either spot or on-demand instances
diff --git a/examples/llms/llama31/tgi/task.dstack.yml b/examples/llms/llama31/tgi/task.dstack.yml
index 42ebf5c77..219b24b87 100644
--- a/examples/llms/llama31/tgi/task.dstack.yml
+++ b/examples/llms/llama31/tgi/task.dstack.yml
@@ -7,7 +7,7 @@ image: ghcr.io/huggingface/text-generation-inference:latest
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct
- MAX_INPUT_LENGTH=4000
- MAX_TOTAL_TOKENS=4096
diff --git a/examples/llms/llama31/vllm/task.dstack.yml b/examples/llms/llama31/vllm/task.dstack.yml
index 67606f2a7..427bef351 100644
--- a/examples/llms/llama31/vllm/task.dstack.yml
+++ b/examples/llms/llama31/vllm/task.dstack.yml
@@ -6,7 +6,7 @@ python: "3.10"
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct
- MAX_MODEL_LEN=4096
commands:
diff --git a/examples/llms/llama32/.dstack.yml b/examples/llms/llama32/.dstack.yml
index 6915cd3fc..84be302d3 100644
--- a/examples/llms/llama32/.dstack.yml
+++ b/examples/llms/llama32/.dstack.yml
@@ -7,7 +7,7 @@ python: "3.10"
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
ide: vscode
# Use either spot or on-demand instances
diff --git a/examples/llms/llama32/README.md b/examples/llms/llama32/README.md
index 7d30ea1cc..f1e392f46 100644
--- a/examples/llms/llama32/README.md
+++ b/examples/llms/llama32/README.md
@@ -31,7 +31,7 @@ name: llama32-task-vllm
python: "3.10"
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- MODEL_ID=meta-llama/Llama-3.2-11B-Vision-Instruct
- MAX_MODEL_LEN=13488
- MAX_NUM_SEQS=40
@@ -85,7 +85,7 @@ To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/referenc
```shell
-$ HUGGING_FACE_HUB_TOKEN=...
+$ HF_TOKEN=...
$ dstack apply -f examples/llms/llama32/vllm/task.dstack.yml
diff --git a/examples/llms/llama32/vllm/task.dstack.yml b/examples/llms/llama32/vllm/task.dstack.yml
index 0dffb1169..e537e0a43 100644
--- a/examples/llms/llama32/vllm/task.dstack.yml
+++ b/examples/llms/llama32/vllm/task.dstack.yml
@@ -6,7 +6,7 @@ python: "3.10"
# Required environment variables
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- MODEL_ID=meta-llama/Llama-3.2-11B-Vision-Instruct
- MAX_MODEL_LEN=13488
- MAX_NUM_SEQS=40
diff --git a/examples/llms/mixtral/tgi.dstack.yml b/examples/llms/mixtral/tgi.dstack.yml
index 31868de97..db90043a8 100644
--- a/examples/llms/mixtral/tgi.dstack.yml
+++ b/examples/llms/mixtral/tgi.dstack.yml
@@ -3,7 +3,7 @@ type: service
image: ghcr.io/huggingface/text-generation-inference:latest
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
- MODEL_ID=mistralai/Mixtral-8x7B-Instruct-v0.1
commands:
- text-generation-launcher
diff --git a/examples/llms/mixtral/vllm.dstack.yml b/examples/llms/mixtral/vllm.dstack.yml
index 59d0376a3..31bc11908 100644
--- a/examples/llms/mixtral/vllm.dstack.yml
+++ b/examples/llms/mixtral/vllm.dstack.yml
@@ -3,7 +3,7 @@ type: service
python: "3.11"
env:
- - HUGGING_FACE_HUB_TOKEN
+ - HF_TOKEN
commands:
- pip install vllm
- python -m vllm.entrypoints.openai.api_server
diff --git a/src/dstack/_internal/server/services/gateways/options.py b/src/dstack/_internal/server/services/gateways/options.py
index c46ad8662..6c75acb2c 100644
--- a/src/dstack/_internal/server/services/gateways/options.py
+++ b/src/dstack/_internal/server/services/gateways/options.py
@@ -10,7 +10,7 @@
def complete_service_model(model_info: AnyModel, env: Dict[str, str]):
if model_info.type == "chat" and model_info.format == "tgi":
if model_info.chat_template is None or model_info.eos_token is None:
- hf_token = env.get("HUGGING_FACE_HUB_TOKEN", None)
+ hf_token = env.get("HF_TOKEN", env.get("HUGGING_FACE_HUB_TOKEN"))
tokenizer_config = get_tokenizer_config(model_info.name, hf_token=hf_token)
if model_info.chat_template is None:
model_info.chat_template = tokenizer_config[
@@ -35,9 +35,9 @@ def get_tokenizer_config(model_id: str, hf_token: Optional[str] = None) -> dict:
if resp.status_code == 403:
raise ServerClientError("Private HF models are not supported")
if resp.status_code == 401:
- message = "Failed to access gated model. Specify HUGGING_FACE_HUB_TOKEN env."
+ message = "Failed to access gated model. Specify HF_TOKEN env."
if hf_token is not None:
- message = "Failed to access gated model. Invalid HUGGING_FACE_HUB_TOKEN env."
+ message = "Failed to access gated model. Invalid HF_TOKEN env."
raise ServerClientError(message)
resp.raise_for_status()
except requests.RequestException as e: