Fix batch > 1 in HunyuanVideo by hlky · Pull Request #10548 · huggingface/diffusers (original) (raw)
I still get error with this here:
epoch: 0%| | 0/200 [00:11<?, ?it/s]
Traceback (most recent call last):
File "C:\OneTrainer\modules\ui\TrainUI.py", line 561, in __training_thread_function
trainer.train()
File "C:\OneTrainer\modules\trainer\GenericTrainer.py", line 682, in train
model_output_data = self.model_setup.predict(self.model, batch, self.config, train_progress)
File "C:\OneTrainer\modules\modelSetup\BaseHunyuanVideoSetup.py", line 317, in predict
predicted_flow = model.transformer(
File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "C:\OneTrainer\venv\lib\site-packages\diffusers\models\transformers\transformer_hunyuan_video.py", line 770, in forward
hidden_states, encoder_hidden_states = block(
File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "C:\OneTrainer\modules\util\checkpointing_util.py", line 119, in forward
return checkpoint(
File "C:\OneTrainer\venv\lib\site-packages\torch\_compile.py", line 32, in inner
return disable_fn(*args, **kwargs)
File "C:\OneTrainer\venv\lib\site-packages\torch\_dynamo\eval_frame.py", line 632, in _fn
return fn(*args, **kwargs)
File "C:\OneTrainer\venv\lib\site-packages\torch\utils\checkpoint.py", line 489, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "C:\OneTrainer\venv\lib\site-packages\torch\autograd\function.py", line 575, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "C:\OneTrainer\venv\lib\site-packages\torch\utils\checkpoint.py", line 264, in forward
outputs = run_function(*args)
File "C:\OneTrainer\modules\util\checkpointing_util.py", line 89, in offloaded_custom_forward
output = orig_forward(*args)
File "C:\OneTrainer\venv\lib\site-packages\diffusers\models\transformers\transformer_hunyuan_video.py", line 478, in forward
attn_output, context_attn_output = self.attn(
File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "C:\OneTrainer\venv\lib\site-packages\diffusers\models\attention_processor.py", line 588, in forward
return self.processor(
File "C:\OneTrainer\venv\lib\site-packages\diffusers\models\transformers\transformer_hunyuan_video.py", line 117, in __call__
hidden_states = F.scaled_dot_product_attention(
RuntimeError: The expanded size of the tensor (24) must match the existing size (16) at non-singleton dimension 1. Target sizes: [16, 24, 1085, 1085]. Tensor sizes: [16, 1, 1085]