You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
报错如下:
Traceback (most recent call last):
File "/home/tiger/.local/lib/python3.9/site-packages/swift/cli/sft.py", line 5, in
sft_main()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 249, in sft_main
sft_main()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 249, in sft_main
return SwiftSft(args).main()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/base.py", line 46, in main
return SwiftSft(args).main()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/base.py", line 46, in main
result = self.run()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 137, in run
result = self.run()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 137, in run
return self.train(trainer)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 189, in train
return self.train(trainer)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 189, in train
trainer.train(trainer.args.resume_from_checkpoint)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/trainers/mixin.py", line 261, in train
trainer.train(trainer.args.resume_from_checkpoint)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/trainers/mixin.py", line 261, in train
res = super().train(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2164, in train
res = super().train(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2164, in train
return inner_training_loop(
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2524, in _inner_training_loop
return inner_training_loop(
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2524, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 3654, in training_step
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 3654, in training_step
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/trainers/trainers.py", line 144, in compute_loss
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/trainers/trainers.py", line 144, in compute_loss
outputs = model(**inputs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
outputs = model(**inputs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/deepspeed/utils/nvtx.py", line 18, in wrapped_fn
return forward_call(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/deepspeed/utils/nvtx.py", line 18, in wrapped_fn
ret_val = func(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/deepspeed/runtime/engine.py", line 1914, in forward
ret_val = func(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/deepspeed/runtime/engine.py", line 1914, in forward
loss = self.module(*inputs, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
loss = self.module(*inputs, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1547, in _call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1547, in _call_impl
args_kwargs_result = hook(self, args, kwargs) # type: ignore[misc]
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/template/base.py", line 779, in pre_forward_hook
args_kwargs_result = hook(self, args, kwargs) # type: ignore[misc]
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/template/base.py", line 779, in pre_forward_hook
kwargs = to_device(self._post_encode(model, old_kwargs), model.device)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/template/template/qwen.py", line 310, in _post_encode
kwargs = to_device(self._post_encode(model, old_kwargs), model.device)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/template/template/qwen.py", line 310, in _post_encode
position_ids, _ = model.get_rope_index(input_ids, image_grid_thw, video_grid_thw, inputs['attention_mask'])
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1570, in get_rope_index
position_ids, _ = model.get_rope_index(input_ids, image_grid_thw, video_grid_thw, inputs['attention_mask'])
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1570, in get_rope_index
position_ids[..., i, attention_mask[i] == 1] = llm_positions.to(position_ids.device)
RuntimeError: shape mismatch: value tensor of shape [3, 2515] cannot be broadcast to indexing result of shape [3, 2034]
position_ids[..., i, attention_mask[i] == 1] = llm_positions.to(position_ids.device)
RuntimeError: shape mismatch: value tensor of shape [3, 2527] cannot be broadcast to indexing result of shape [3, 1974]
The text was updated successfully, but these errors were encountered:
您好,我试用swift3.0.3训练 QwenVL2 72B,多模态任务,担心length太长想用序列并行,但是报错,我的设置如下:
MODEL_P="72B"
TOKENIZERS_PARALLELISM=false NPROC_PER_NODE=${ARNOLD_WORKER_GPU} CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 MASTER_PORT=$METIS_WORKER_0_PORT MAX_PIXELS=602112
swift sft
--model /opt/tiger/soweval/Chen_Li/creator_model/Qwen2VL/Qwen2-VL-$MODEL_P-Instruct
--torch_dtype bfloat16
--num_train_epochs 10
--save_strategy steps
--save_steps 5
--train_type full
--logging_steps 1
--max_length 4000
--output_dir xxx/Qwen2VL/swift_train/output_$MODEL_P
--dataset xxx/2025-01-22/init.jsonl
--val_dataset xxx/2025-01-22/init.jsonl
--per_device_train_batch_size 1
--per_device_eval_batch_size 1
--gradient_accumulation_steps 64
--learning_rate 1e-4
--deepspeed zero3
--save_total_limit 3
--eval_steps 10
--freeze_vit true
--warmup_ratio 0.05
--dataloader_num_workers 4
--sequence_parallel_size 2 \
--attn_impl flash_attn
报错如下:
Traceback (most recent call last):
File "/home/tiger/.local/lib/python3.9/site-packages/swift/cli/sft.py", line 5, in
sft_main()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 249, in sft_main
sft_main()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 249, in sft_main
return SwiftSft(args).main()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/base.py", line 46, in main
return SwiftSft(args).main()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/base.py", line 46, in main
result = self.run()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 137, in run
result = self.run()
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 137, in run
return self.train(trainer)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 189, in train
return self.train(trainer)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/train/sft.py", line 189, in train
trainer.train(trainer.args.resume_from_checkpoint)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/trainers/mixin.py", line 261, in train
trainer.train(trainer.args.resume_from_checkpoint)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/trainers/mixin.py", line 261, in train
res = super().train(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2164, in train
res = super().train(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2164, in train
return inner_training_loop(
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2524, in _inner_training_loop
return inner_training_loop(
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 2524, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 3654, in training_step
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/trainer.py", line 3654, in training_step
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/trainers/trainers.py", line 144, in compute_loss
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/trainers/trainers.py", line 144, in compute_loss
outputs = model(**inputs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
outputs = model(**inputs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/deepspeed/utils/nvtx.py", line 18, in wrapped_fn
return forward_call(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/deepspeed/utils/nvtx.py", line 18, in wrapped_fn
ret_val = func(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/deepspeed/runtime/engine.py", line 1914, in forward
ret_val = func(*args, **kwargs)
File "/home/tiger/.local/lib/python3.9/site-packages/deepspeed/runtime/engine.py", line 1914, in forward
loss = self.module(*inputs, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
loss = self.module(*inputs, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1547, in _call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1547, in _call_impl
args_kwargs_result = hook(self, args, kwargs) # type: ignore[misc]
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/template/base.py", line 779, in pre_forward_hook
args_kwargs_result = hook(self, args, kwargs) # type: ignore[misc]
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/template/base.py", line 779, in pre_forward_hook
kwargs = to_device(self._post_encode(model, old_kwargs), model.device)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/template/template/qwen.py", line 310, in _post_encode
kwargs = to_device(self._post_encode(model, old_kwargs), model.device)
File "/home/tiger/.local/lib/python3.9/site-packages/swift/llm/template/template/qwen.py", line 310, in _post_encode
position_ids, _ = model.get_rope_index(input_ids, image_grid_thw, video_grid_thw, inputs['attention_mask'])
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1570, in get_rope_index
position_ids, _ = model.get_rope_index(input_ids, image_grid_thw, video_grid_thw, inputs['attention_mask'])
File "/home/tiger/.local/lib/python3.9/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py", line 1570, in get_rope_index
position_ids[..., i, attention_mask[i] == 1] = llm_positions.to(position_ids.device)
RuntimeError: shape mismatch: value tensor of shape [3, 2515] cannot be broadcast to indexing result of shape [3, 2034]
position_ids[..., i, attention_mask[i] == 1] = llm_positions.to(position_ids.device)
RuntimeError: shape mismatch: value tensor of shape [3, 2527] cannot be broadcast to indexing result of shape [3, 1974]
The text was updated successfully, but these errors were encountered: