diff --git a/h2o_hf/README.md b/h2o_hf/README.md index 32ea99d..602fefb 100644 --- a/h2o_hf/README.md +++ b/h2o_hf/README.md @@ -8,7 +8,7 @@ ``` pip install crfm-helm -pip install git+https://github.com/huggingface/transformers +pip install transformers==4.33 pip install lm-eval ``` diff --git a/h2o_hf/utils_hh/modify_gptneox.py b/h2o_hf/utils_hh/modify_gptneox.py index dc2df97..d442e87 100644 --- a/h2o_hf/utils_hh/modify_gptneox.py +++ b/h2o_hf/utils_hh/modify_gptneox.py @@ -13,7 +13,7 @@ from torch.cuda.amp import autocast from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from transformers.models.gpt_neox.modeling_gpt_neox import RotaryEmbedding, GPTNeoXAttention, apply_rotary_pos_emb +from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXRotaryEmbedding, GPTNeoXAttention, apply_rotary_pos_emb __all__ = ['convert_kvcache_gpt_neox_heavy_recent', 'GPTNeoXAttention_Mask'] @@ -34,7 +34,7 @@ def __init__(self, config): ), ) self.register_buffer("masked_bias", torch.tensor(-1e9)) - self.rotary_emb = RotaryEmbedding( + self.rotary_emb = GPTNeoXRotaryEmbedding( self.rotary_ndims, config.max_position_embeddings, base=config.rotary_emb_base ) self.register_buffer(