Fix batch > 1 in HunyuanVideo by hlky · Pull Request #10548 · huggingface/diffusers (original) (raw)

I still get error with this here:

epoch:   0%|                                                                                   | 0/200 [00:11<?, ?it/s]
Traceback (most recent call last):
  File "C:\OneTrainer\modules\ui\TrainUI.py", line 561, in __training_thread_function
    trainer.train()
  File "C:\OneTrainer\modules\trainer\GenericTrainer.py", line 682, in train
    model_output_data = self.model_setup.predict(self.model, batch, self.config, train_progress)
  File "C:\OneTrainer\modules\modelSetup\BaseHunyuanVideoSetup.py", line 317, in predict
    predicted_flow = model.transformer(
  File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
  File "C:\OneTrainer\venv\lib\site-packages\diffusers\models\transformers\transformer_hunyuan_video.py", line 770, in forward
    hidden_states, encoder_hidden_states = block(
  File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
  File "C:\OneTrainer\modules\util\checkpointing_util.py", line 119, in forward
    return checkpoint(
  File "C:\OneTrainer\venv\lib\site-packages\torch\_compile.py", line 32, in inner
    return disable_fn(*args, **kwargs)
  File "C:\OneTrainer\venv\lib\site-packages\torch\_dynamo\eval_frame.py", line 632, in _fn
    return fn(*args, **kwargs)
  File "C:\OneTrainer\venv\lib\site-packages\torch\utils\checkpoint.py", line 489, in checkpoint
    return CheckpointFunction.apply(function, preserve, *args)
  File "C:\OneTrainer\venv\lib\site-packages\torch\autograd\function.py", line 575, in apply
    return super().apply(*args, **kwargs)  # type: ignore[misc]
  File "C:\OneTrainer\venv\lib\site-packages\torch\utils\checkpoint.py", line 264, in forward
    outputs = run_function(*args)
  File "C:\OneTrainer\modules\util\checkpointing_util.py", line 89, in offloaded_custom_forward
    output = orig_forward(*args)
  File "C:\OneTrainer\venv\lib\site-packages\diffusers\models\transformers\transformer_hunyuan_video.py", line 478, in forward
    attn_output, context_attn_output = self.attn(
  File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "C:\OneTrainer\venv\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
  File "C:\OneTrainer\venv\lib\site-packages\diffusers\models\attention_processor.py", line 588, in forward
    return self.processor(
  File "C:\OneTrainer\venv\lib\site-packages\diffusers\models\transformers\transformer_hunyuan_video.py", line 117, in __call__
    hidden_states = F.scaled_dot_product_attention(
RuntimeError: The expanded size of the tensor (24) must match the existing size (16) at non-singleton dimension 1.  Target sizes: [16, 24, 1085, 1085].  Tensor sizes: [16, 1, 1085]