From b0ae592b6822f4e64e8a19056d2675613d9c5f97 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Thu, 14 Dec 2023 09:18:29 -0800 Subject: [PATCH 1/3] Test moving to HF_HOME from TRANSFORMERS_CACHE --- .github/workflows/cpu-inference.yml | 4 ++-- .github/workflows/nv-torch-latest-cpu.yml | 4 ++-- .github/workflows/setup-venv/action.yml | 2 +- tests/unit/inference/test_checkpoint_sharding.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cpu-inference.yml b/.github/workflows/cpu-inference.yml index 521fe2b5bea4..fddf1f79be03 100644 --- a/.github/workflows/cpu-inference.yml +++ b/.github/workflows/cpu-inference.yml @@ -71,5 +71,5 @@ jobs: source oneCCL/build/_install/env/setvars.sh unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch cd tests - TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/ - TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/ + HF_HOME=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/ + HF_HOME=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/ diff --git a/.github/workflows/nv-torch-latest-cpu.yml b/.github/workflows/nv-torch-latest-cpu.yml index 375b984134cb..3d79b4a15114 100644 --- a/.github/workflows/nv-torch-latest-cpu.yml +++ b/.github/workflows/nv-torch-latest-cpu.yml @@ -45,5 +45,5 @@ jobs: run: | unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch cd tests - TRANSFORMERS_CACHE=/tmp/transformers_cache/ pytest $PYTEST_OPTS -n 4 unit/ --torch_ver="1.12" - TRANSFORMERS_CACHE=/tmp/transformers_cache/ pytest $PYTEST_OPTS -m 'sequential' unit/ --torch_ver="1.12" + HF_HOME=/tmp/transformers_cache/ pytest $PYTEST_OPTS -n 4 unit/ --torch_ver="1.12" + HF_HOME=/tmp/transformers_cache/ pytest $PYTEST_OPTS -m 'sequential' unit/ --torch_ver="1.12" diff --git a/.github/workflows/setup-venv/action.yml b/.github/workflows/setup-venv/action.yml index ce2c458b9e57..1fc96097e390 100644 --- a/.github/workflows/setup-venv/action.yml +++ b/.github/workflows/setup-venv/action.yml @@ -22,7 +22,7 @@ runs: - id: set-env-vars run: | echo TEST_DATA_DIR=/blob/ >> $GITHUB_ENV - echo TRANSFORMERS_CACHE=/blob/transformers_cache/ >> $GITHUB_ENV + echo HF_HOME=/blob/transformers_cache/ >> $GITHUB_ENV echo TORCH_EXTENSIONS_DIR=./torch-extensions/ >> $GITHUB_ENV echo TORCH_CACHE=/blob/torch_cache/ >> $GITHUB_ENV echo HF_DATASETS_CACHE=/blob/datasets_cache/ >> $GITHUB_ENV diff --git a/tests/unit/inference/test_checkpoint_sharding.py b/tests/unit/inference/test_checkpoint_sharding.py index 564b3fab6bf4..5bae9a151a27 100644 --- a/tests/unit/inference/test_checkpoint_sharding.py +++ b/tests/unit/inference/test_checkpoint_sharding.py @@ -110,7 +110,7 @@ def write_checkpoints_json(model_name, class_tmpdir): cached_repo_dir = snapshot_download( model_name, local_files_only=is_offline_mode(), - cache_dir=os.getenv("TRANSFORMERS_CACHE", None), + cache_dir=os.getenv("HF_HOME", None), ignore_patterns=["*.safetensors", "*.msgpack", "*.h5"], ) file_list = [str(entry) for entry in Path(cached_repo_dir).rglob("*.[bp][it][n]") if entry.is_file()] From 910aacf3a7ccea5d4a468954f213128fb1577e15 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Mon, 8 Apr 2024 15:08:51 -0700 Subject: [PATCH 2/3] Update remaining instances of TRANSFORMERS_CACHE to HF_HOME --- tests/unit/inference/test_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/inference/test_inference.py b/tests/unit/inference/test_inference.py index 4e203a71db60..36003319856c 100644 --- a/tests/unit/inference/test_inference.py +++ b/tests/unit/inference/test_inference.py @@ -84,7 +84,7 @@ class ModelInfo: def _hf_model_list() -> List[ModelInfo]: """ Caches HF model list to avoid repeated API calls """ - cache_dir = os.getenv("TRANSFORMERS_CACHE", "~/.cache/huggingface") + cache_dir = os.getenv("HF_HOME", "~/.cache/huggingface") cache_file_path = os.path.join(cache_dir, "DS_model_cache.pkl") cache_expiration_seconds = 60 * 60 * 24 # 1 day From 94e1646246dad883c23901bfa28d97a4b3539392 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Tue, 21 May 2024 16:36:02 -0700 Subject: [PATCH 3/3] Update path --- .github/workflows/cpu-inference.yml | 4 ++-- .github/workflows/cpu-torch-latest.yml | 4 ++-- .github/workflows/setup-venv/action.yml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cpu-inference.yml b/.github/workflows/cpu-inference.yml index c95527daa384..d91034270eec 100644 --- a/.github/workflows/cpu-inference.yml +++ b/.github/workflows/cpu-inference.yml @@ -97,5 +97,5 @@ jobs: unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch cd tests # LOCAL_SIZE=2 enforce CPU to report 2 devices, this helps run the test on github default runner - LOCAL_SIZE=2 COLUMNS=240 HF_HOME=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/ - LOCAL_SIZE=2 COLUMNS=240 HF_HOME=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/ + LOCAL_SIZE=2 COLUMNS=240 HF_HOME=~/tmp/hf_home/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/ + LOCAL_SIZE=2 COLUMNS=240 HF_HOME=~/tmp/hf_home/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/ diff --git a/.github/workflows/cpu-torch-latest.yml b/.github/workflows/cpu-torch-latest.yml index 5287b8be8069..213421590ad6 100644 --- a/.github/workflows/cpu-torch-latest.yml +++ b/.github/workflows/cpu-torch-latest.yml @@ -50,5 +50,5 @@ jobs: run: | unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch cd tests - HF_HOME=/tmp/transformers_cache/ pytest $PYTEST_OPTS -n 4 unit/ --torch_ver="2.3" - HF_HOME=/tmp/transformers_cache/ pytest $PYTEST_OPTS -m 'sequential' unit/ --torch_ver="2.3" + HF_HOME=/tmp/hf_home/ pytest $PYTEST_OPTS -n 4 unit/ --torch_ver="2.3" + HF_HOME=/tmp/hf_home/ pytest $PYTEST_OPTS -m 'sequential' unit/ --torch_ver="2.3" diff --git a/.github/workflows/setup-venv/action.yml b/.github/workflows/setup-venv/action.yml index 1fc96097e390..9a88e0651860 100644 --- a/.github/workflows/setup-venv/action.yml +++ b/.github/workflows/setup-venv/action.yml @@ -22,7 +22,7 @@ runs: - id: set-env-vars run: | echo TEST_DATA_DIR=/blob/ >> $GITHUB_ENV - echo HF_HOME=/blob/transformers_cache/ >> $GITHUB_ENV + echo HF_HOME=/blob/hf_home/ >> $GITHUB_ENV echo TORCH_EXTENSIONS_DIR=./torch-extensions/ >> $GITHUB_ENV echo TORCH_CACHE=/blob/torch_cache/ >> $GITHUB_ENV echo HF_DATASETS_CACHE=/blob/datasets_cache/ >> $GITHUB_ENV