diff --git a/h2o_hf/README.md b/h2o_hf/README.md
index 32ea99d..602fefb 100644
--- a/h2o_hf/README.md
+++ b/h2o_hf/README.md
@@ -8,7 +8,7 @@
 
 ```
 pip install crfm-helm
-pip install git+https://github.com/huggingface/transformers
+pip install transformers==4.33
 pip install lm-eval
 ```
 
diff --git a/h2o_hf/utils_hh/modify_gptneox.py b/h2o_hf/utils_hh/modify_gptneox.py
index dc2df97..d442e87 100644
--- a/h2o_hf/utils_hh/modify_gptneox.py
+++ b/h2o_hf/utils_hh/modify_gptneox.py
@@ -13,7 +13,7 @@
 from torch.cuda.amp import autocast
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 
-from transformers.models.gpt_neox.modeling_gpt_neox import RotaryEmbedding, GPTNeoXAttention, apply_rotary_pos_emb
+from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXRotaryEmbedding, GPTNeoXAttention, apply_rotary_pos_emb
 
 
 __all__ = ['convert_kvcache_gpt_neox_heavy_recent', 'GPTNeoXAttention_Mask']
@@ -34,7 +34,7 @@ def __init__(self, config):
             ),
         )
         self.register_buffer("masked_bias", torch.tensor(-1e9))
-        self.rotary_emb = RotaryEmbedding(
+        self.rotary_emb = GPTNeoXRotaryEmbedding(
             self.rotary_ndims, config.max_position_embeddings, base=config.rotary_emb_base
         )
         self.register_buffer(