You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[RANK 0] Collecting rollouts
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[9], line 1
----> 1 trlx.train(
2 reward_fn=reward_function,
3 prompts=df_train['query'].tolist(),
4 eval_prompts=df_test['query'].tolist(),
5 config=config,
6 )
File ~/.conda/envs/trlx/lib/python3.10/site-packages/trlx/trlx.py:129, in train(model_path, reward_fn, dataset, samples, rewards, prompts, eval_prompts, metric_fn, config, stop_sequences)
126 if config.train.resume_from_checkpoint and os.path.exists(config.train.resume_from_checkpoint):
127 trainer.load(config.train.resume_from_checkpoint)
--> 129 trainer.learn()
130 return trainer
File ~/.conda/envs/trlx/lib/python3.10/site-packages/trlx/trainer/accelerate_base_trainer.py:521, in AccelerateRLTrainer.learn(self)
516 """
517 Samples batches from `self.store`, updates model and periodically evaluates it on `self.eval_dataloader`
518 """
519 logger.info("Starting training")
--> 521 self.prepare_learning()
522 self.iter_count = 0
523 self.nth_evaluation = 0
File ~/.conda/envs/trlx/lib/python3.10/site-packages/trlx/trainer/accelerate_ppo_trainer.py:234, in AcceleratePPOTrainer.prepare_learning(self)
231 eval_dataloader = self.eval_pipeline.create_loader(self.config.method.chunk_size)
232 self.eval_dataloader = self.accelerator.prepare_data_loader(eval_dataloader)
--> 234 self.make_experience(self.config.method.num_rollouts)
236 self.train_dataloader = self.create_train_dataloader()
238 self.n_inner_epochs = self.config.method.ppo_epochs
File ~/.conda/envs/trlx/lib/python3.10/site-packages/trlx/trainer/accelerate_ppo_trainer.py:418, in AcceleratePPOTrainer.make_experience(self, num_rollouts, iter_count)
416 position_ids.masked_fill_(attention_mask == 0, 1)
417 with torch.no_grad():
--> 418 logits, *_, values = self.model(
419 all_tokens, attention_mask=attention_mask, position_ids=position_ids
420 )
421 # TODO(dahoas): When hydra model works need to also support generation on hydra head
422 if hasattr(self.model, "frozen_head") or self.model.peft_type:
File ~/.conda/envs/trlx/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File ~/.conda/envs/trlx/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
File ~/.conda/envs/trlx/lib/python3.10/site-packages/trlx/models/modeling_ppo.py:329, in AutoModelForCausalLMWithValueHead.forward(self, input_ids, attention_mask, past_key_values, position_ids, head_mask, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, ignore_peft_adapter)
327 outputs = self.base_model.base_model(**forward_kwargs)
328 else:
--> 329 outputs = self.base_model(**forward_kwargs)
331 # TODO: Apply PEFT to value branch
332 if self.num_value_layers_unfrozen > 0:
File ~/.conda/envs/trlx/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File ~/.conda/envs/trlx/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
File ~/.conda/envs/trlx/lib/python3.10/site-packages/peft/peft_model.py:918, in PeftModelForCausalLM.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, **kwargs)
907 raise AssertionError("forward in MPTForCausalLM does not support inputs_embeds")
908 return self.base_model(
909 input_ids=input_ids,
910 attention_mask=attention_mask,
(...)
915 **kwargs,
916 )
--> 918 return self.base_model(
919 input_ids=input_ids,
920 attention_mask=attention_mask,
921 inputs_embeds=inputs_embeds,
922 labels=labels,
923 output_attentions=output_attentions,
924 output_hidden_states=output_hidden_states,
925 return_dict=return_dict,
926 **kwargs,
927 )
929 batch_size = _get_batch_size(input_ids, inputs_embeds)
930 if attention_mask is not None:
931 # concat prompt attention mask
File ~/.conda/envs/trlx/lib/python3.10/site-packages/torch/nn/modules/module.py:1518, in Module._wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
-> 1518 return self._call_impl(*args, **kwargs)
File ~/.conda/envs/trlx/lib/python3.10/site-packages/torch/nn/modules/module.py:1527, in Module._call_impl(self, *args, **kwargs)
1522 # If we don't have any hooks, we want to skip the rest of the logic in
1523 # this function, and just call forward.
1524 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1527 return forward_call(*args, **kwargs)
1529 try:
1530 result = None
File ~/.conda/envs/trlx/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:94, in BaseTuner.forward(self, *args, **kwargs)
93 def forward(self, *args: Any, **kwargs: Any):
---> 94 return self.model.forward(*args, **kwargs)
TypeError: LlamaForCausalLM.forward() got an unexpected keyword argument 'head_mask'```
### Which trlX version are you using?
0.7.0
### Additional system and package information
Linux
The text was updated successfully, but these errors were encountered:
There are a few accelerator.log calls in different places which should happen on the same tick, but they should be properly synchronized with the step argument. So not sure what could be the reason behind this warning 🤔
🐛 Describe the bug
I've tried to train LLama model using the reward model. I created such a config
And training code
But it caused error with mask head (???)
The text was updated successfully, but these errors were encountered: