Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fine tune error #9

Open
bhuvneshsaini opened this issue Dec 20, 2023 · 0 comments
Open

Fine tune error #9

bhuvneshsaini opened this issue Dec 20, 2023 · 0 comments

Comments

@bhuvneshsaini
Copy link

I was simply do the steps described by you

Start training

RuntimeError Traceback (most recent call last)
Cell In[6], line 32
29 trainer.load_base_model()
31 print("Start training")
---> 32 trainer.train()
34 print("Merge model and save")
35 trainer.merge_and_save()

File /kaggle/input/llama-fine/llm_qlora-main/QloraTrainer.py:95, in QloraTrainer.train(self)
77 trainer = transformers.Trainer(
78 model=model,
79 train_dataset=data["train"],
(...)
92 data_collator=transformers.DataCollatorForLanguageModeling(self.tokenizer, mlm=False),
93 )
94 model.config.use_cache = False # silence the warnings. Please re-enable for inference!
---> 95 trainer.train()
97 model_save_path = f"{self.config['model_output_dir']}/{self.config['model_name']}_adapter"
98 trainer.save_model(model_save_path)

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1537, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1535 hf_hub_utils.enable_progress_bars()
1536 else:
-> 1537 return inner_training_loop(
1538 args=args,
1539 resume_from_checkpoint=resume_from_checkpoint,
1540 trial=trial,
1541 ignore_keys_for_eval=ignore_keys_for_eval,
1542 )

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1854, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1851 self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
1853 with self.accelerator.accumulate(model):
-> 1854 tr_loss_step = self.training_step(model, inputs)
1856 if (
1857 args.logging_nan_inf_filter
1858 and not is_torch_tpu_available()
1859 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
1860 ):
1861 # if loss is nan or inf simply add the average of previous logged losses
1862 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2723, in Trainer.training_step(self, model, inputs)
2720 return loss_mb.reduce_mean().detach().to(self.args.device)
2722 with self.compute_loss_context_manager():
-> 2723 loss = self.compute_loss(model, inputs)
2725 if self.args.n_gpu > 1:
2726 loss = loss.mean() # mean() to average on multi-gpu parallel training

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2746, in Trainer.compute_loss(self, model, inputs, return_outputs)
2744 else:
2745 labels = None
-> 2746 outputs = model(**inputs)
2747 # Save past state if it exists
2748 # TODO: this needs to be fixed and made cleaner later.
2749 if self.args.past_index >= 0:

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:161, in DataParallel.forward(self, *inputs, **kwargs)
156 if t.device != self.src_device_obj:
157 raise RuntimeError("module must have its parameters and buffers "
158 "on device {} (device_ids[0]) but found one of "
159 "them on device: {}".format(self.src_device_obj, t.device))
--> 161 inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
162 # for forward function without any inputs, empty list and dict will be created
163 # so the module can be executed on one device which is the first one in device_ids
164 if not inputs and not kwargs:

File /opt/conda/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:178, in DataParallel.scatter(self, inputs, kwargs, device_ids)
177 def scatter(self, inputs, kwargs, device_ids):
--> 178 return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)

File /opt/conda/lib/python3.10/site-packages/torch/nn/parallel/scatter_gather.py:53, in scatter_kwargs(inputs, kwargs, target_gpus, dim)
51 r"""Scatter with support for kwargs dictionary"""
52 inputs = scatter(inputs, target_gpus, dim) if inputs else []
---> 53 kwargs = scatter(kwargs, target_gpus, dim) if kwargs else []
54 if len(inputs) < len(kwargs):
55 inputs.extend(() for _ in range(len(kwargs) - len(inputs)))

File /opt/conda/lib/python3.10/site-packages/torch/nn/parallel/scatter_gather.py:44, in scatter(inputs, target_gpus, dim)
38 # After scatter_map is called, a scatter_map cell will exist. This cell
39 # has a reference to the actual function scatter_map, which has references
40 # to a closure that has a reference to the scatter_map cell (because the
41 # fn is recursive). To avoid this reference cycle, we set the function to
42 # None, clearing the cell
43 try:
---> 44 res = scatter_map(inputs)
45 finally:
46 scatter_map = None

File /opt/conda/lib/python3.10/site-packages/torch/nn/parallel/scatter_gather.py:35, in scatter..scatter_map(obj)
33 return [list(i) for i in zip(*map(scatter_map, obj))]
34 if isinstance(obj, dict) and len(obj) > 0:
---> 35 return [type(obj)(i) for i in zip(*map(scatter_map, obj.items()))]
36 return [obj for targets in target_gpus]

File /opt/conda/lib/python3.10/site-packages/torch/nn/parallel/scatter_gather.py:31, in scatter..scatter_map(obj)
29 return [type(obj)(*args) for args in zip(*map(scatter_map, obj))]
30 if isinstance(obj, tuple) and len(obj) > 0:
---> 31 return list(zip(*map(scatter_map, obj)))
32 if isinstance(obj, list) and len(obj) > 0:
33 return [list(i) for i in zip(*map(scatter_map, obj))]

File /opt/conda/lib/python3.10/site-packages/torch/nn/parallel/scatter_gather.py:27, in scatter..scatter_map(obj)
25 def scatter_map(obj):
26 if isinstance(obj, torch.Tensor):
---> 27 return Scatter.apply(target_gpus, None, dim, obj)
28 if _is_namedtuple(obj):
29 return [type(obj)(*args) for args in zip(*map(scatter_map, obj))]

File /opt/conda/lib/python3.10/site-packages/torch/autograd/function.py:506, in Function.apply(cls, *args, **kwargs)
503 if not torch._C._are_functorch_transforms_active():
504 # See NOTE: [functorch vjp and autograd interaction]
505 args = _functorch.utils.unwrap_dead_wrappers(args)
--> 506 return super().apply(*args, **kwargs) # type: ignore[misc]
508 if cls.setup_context == _SingleLevelFunction.setup_context:
509 raise RuntimeError(
510 'In order to use an autograd.Function with functorch transforms '
511 '(vmap, grad, jvp, jacrev, ...), it must override the setup_context '
512 'staticmethod. For more details, please see '
513 'https://pytorch.org/docs/master/notes/extending.func.html style="color:rgb(175,0,0)">')

File /opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:96, in Scatter.forward(ctx, target_gpus, chunk_sizes, dim, input)
93 if torch.cuda.is_available() and ctx.input_device == -1:
94 # Perform CPU to GPU copies in a background stream
95 streams = [_get_stream(device) for device in target_gpus]
---> 96 outputs = comm.scatter(input, target_gpus, chunk_sizes, ctx.dim, streams)
97 # Synchronize with the copy stream
98 if streams is not None:

File /opt/conda/lib/python3.10/site-packages/torch/nn/parallel/comm.py:189, in scatter(tensor, devices, chunk_sizes, dim, streams, out)
187 if out is None:
188 devices = [_get_device_index(d) for d in devices]
--> 189 return tuple(torch._C._scatter(tensor, devices, chunk_sizes, dim, streams))
190 else:
191 if devices is not None:

RuntimeError: CUDA error: device kernel image is invalid
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant