diff --git a/.github/workflows/cpu-inference.yml b/.github/workflows/cpu-inference.yml index 38dd9bd3efef..d91034270eec 100644 --- a/.github/workflows/cpu-inference.yml +++ b/.github/workflows/cpu-inference.yml @@ -97,5 +97,5 @@ jobs: unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch cd tests # LOCAL_SIZE=2 enforce CPU to report 2 devices, this helps run the test on github default runner - LOCAL_SIZE=2 COLUMNS=240 TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/ - LOCAL_SIZE=2 COLUMNS=240 TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/ + LOCAL_SIZE=2 COLUMNS=240 HF_HOME=~/tmp/hf_home/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/ + LOCAL_SIZE=2 COLUMNS=240 HF_HOME=~/tmp/hf_home/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/ diff --git a/.github/workflows/cpu-torch-latest.yml b/.github/workflows/cpu-torch-latest.yml index 5727ff2e1cde..213421590ad6 100644 --- a/.github/workflows/cpu-torch-latest.yml +++ b/.github/workflows/cpu-torch-latest.yml @@ -50,5 +50,5 @@ jobs: run: | unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch cd tests - TRANSFORMERS_CACHE=/tmp/transformers_cache/ pytest $PYTEST_OPTS -n 4 unit/ --torch_ver="2.3" - TRANSFORMERS_CACHE=/tmp/transformers_cache/ pytest $PYTEST_OPTS -m 'sequential' unit/ --torch_ver="2.3" + HF_HOME=/tmp/hf_home/ pytest $PYTEST_OPTS -n 4 unit/ --torch_ver="2.3" + HF_HOME=/tmp/hf_home/ pytest $PYTEST_OPTS -m 'sequential' unit/ --torch_ver="2.3" diff --git a/.github/workflows/setup-venv/action.yml b/.github/workflows/setup-venv/action.yml index ce2c458b9e57..9a88e0651860 100644 --- a/.github/workflows/setup-venv/action.yml +++ b/.github/workflows/setup-venv/action.yml @@ -22,7 +22,7 @@ runs: - id: set-env-vars run: | echo TEST_DATA_DIR=/blob/ >> $GITHUB_ENV - echo TRANSFORMERS_CACHE=/blob/transformers_cache/ >> $GITHUB_ENV + echo HF_HOME=/blob/hf_home/ >> $GITHUB_ENV echo TORCH_EXTENSIONS_DIR=./torch-extensions/ >> $GITHUB_ENV echo TORCH_CACHE=/blob/torch_cache/ >> $GITHUB_ENV echo HF_DATASETS_CACHE=/blob/datasets_cache/ >> $GITHUB_ENV diff --git a/tests/unit/inference/test_checkpoint_sharding.py b/tests/unit/inference/test_checkpoint_sharding.py index 564b3fab6bf4..5bae9a151a27 100644 --- a/tests/unit/inference/test_checkpoint_sharding.py +++ b/tests/unit/inference/test_checkpoint_sharding.py @@ -110,7 +110,7 @@ def write_checkpoints_json(model_name, class_tmpdir): cached_repo_dir = snapshot_download( model_name, local_files_only=is_offline_mode(), - cache_dir=os.getenv("TRANSFORMERS_CACHE", None), + cache_dir=os.getenv("HF_HOME", None), ignore_patterns=["*.safetensors", "*.msgpack", "*.h5"], ) file_list = [str(entry) for entry in Path(cached_repo_dir).rglob("*.[bp][it][n]") if entry.is_file()] diff --git a/tests/unit/inference/test_inference.py b/tests/unit/inference/test_inference.py index 4e203a71db60..36003319856c 100644 --- a/tests/unit/inference/test_inference.py +++ b/tests/unit/inference/test_inference.py @@ -84,7 +84,7 @@ class ModelInfo: def _hf_model_list() -> List[ModelInfo]: """ Caches HF model list to avoid repeated API calls """ - cache_dir = os.getenv("TRANSFORMERS_CACHE", "~/.cache/huggingface") + cache_dir = os.getenv("HF_HOME", "~/.cache/huggingface") cache_file_path = os.path.join(cache_dir, "DS_model_cache.pkl") cache_expiration_seconds = 60 * 60 * 24 # 1 day