diff --git a/.github/workflows/cpu-inference.yml b/.github/workflows/cpu-inference.yml
index 38dd9bd3efef..d91034270eec 100644
--- a/.github/workflows/cpu-inference.yml
+++ b/.github/workflows/cpu-inference.yml
@@ -97,5 +97,5 @@ jobs:
           unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
           cd  tests
           # LOCAL_SIZE=2 enforce CPU to report 2 devices, this helps run the test on github default runner
-          LOCAL_SIZE=2 COLUMNS=240 TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/
-          LOCAL_SIZE=2 COLUMNS=240 TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/
+          LOCAL_SIZE=2 COLUMNS=240 HF_HOME=~/tmp/hf_home/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/
+          LOCAL_SIZE=2 COLUMNS=240 HF_HOME=~/tmp/hf_home/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/
diff --git a/.github/workflows/cpu-torch-latest.yml b/.github/workflows/cpu-torch-latest.yml
index 5727ff2e1cde..213421590ad6 100644
--- a/.github/workflows/cpu-torch-latest.yml
+++ b/.github/workflows/cpu-torch-latest.yml
@@ -50,5 +50,5 @@ jobs:
         run: |
           unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
           cd tests
-          TRANSFORMERS_CACHE=/tmp/transformers_cache/ pytest $PYTEST_OPTS -n 4 unit/ --torch_ver="2.3"
-          TRANSFORMERS_CACHE=/tmp/transformers_cache/ pytest $PYTEST_OPTS -m 'sequential' unit/ --torch_ver="2.3"
+          HF_HOME=/tmp/hf_home/ pytest $PYTEST_OPTS -n 4 unit/ --torch_ver="2.3"
+          HF_HOME=/tmp/hf_home/ pytest $PYTEST_OPTS -m 'sequential' unit/ --torch_ver="2.3"
diff --git a/.github/workflows/setup-venv/action.yml b/.github/workflows/setup-venv/action.yml
index ce2c458b9e57..9a88e0651860 100644
--- a/.github/workflows/setup-venv/action.yml
+++ b/.github/workflows/setup-venv/action.yml
@@ -22,7 +22,7 @@ runs:
     - id: set-env-vars
       run: |
         echo TEST_DATA_DIR=/blob/ >> $GITHUB_ENV
-        echo TRANSFORMERS_CACHE=/blob/transformers_cache/ >> $GITHUB_ENV
+        echo HF_HOME=/blob/hf_home/ >> $GITHUB_ENV
         echo TORCH_EXTENSIONS_DIR=./torch-extensions/ >> $GITHUB_ENV
         echo TORCH_CACHE=/blob/torch_cache/ >> $GITHUB_ENV
         echo HF_DATASETS_CACHE=/blob/datasets_cache/ >> $GITHUB_ENV
diff --git a/tests/unit/inference/test_checkpoint_sharding.py b/tests/unit/inference/test_checkpoint_sharding.py
index 564b3fab6bf4..5bae9a151a27 100644
--- a/tests/unit/inference/test_checkpoint_sharding.py
+++ b/tests/unit/inference/test_checkpoint_sharding.py
@@ -110,7 +110,7 @@ def write_checkpoints_json(model_name, class_tmpdir):
                 cached_repo_dir = snapshot_download(
                     model_name,
                     local_files_only=is_offline_mode(),
-                    cache_dir=os.getenv("TRANSFORMERS_CACHE", None),
+                    cache_dir=os.getenv("HF_HOME", None),
                     ignore_patterns=["*.safetensors", "*.msgpack", "*.h5"],
                 )
                 file_list = [str(entry) for entry in Path(cached_repo_dir).rglob("*.[bp][it][n]") if entry.is_file()]
diff --git a/tests/unit/inference/test_inference.py b/tests/unit/inference/test_inference.py
index 4e203a71db60..36003319856c 100644
--- a/tests/unit/inference/test_inference.py
+++ b/tests/unit/inference/test_inference.py
@@ -84,7 +84,7 @@ class ModelInfo:
 def _hf_model_list() -> List[ModelInfo]:
     """ Caches HF model list to avoid repeated API calls """
 
-    cache_dir = os.getenv("TRANSFORMERS_CACHE", "~/.cache/huggingface")
+    cache_dir = os.getenv("HF_HOME", "~/.cache/huggingface")
     cache_file_path = os.path.join(cache_dir, "DS_model_cache.pkl")
     cache_expiration_seconds = 60 * 60 * 24  # 1 day