From d69cc8f46f238e91e2f597cd301cc53b1d4b8bec Mon Sep 17 00:00:00 2001
From: Volpeon <git@volpeon.ink>
Date: Sat, 25 Mar 2023 17:49:30 +0100
Subject: Fix training with guidance

---
 training/functional.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'training')

diff --git a/training/functional.py b/training/functional.py
index d285366..109845b 100644
--- a/training/functional.py
+++ b/training/functional.py
@@ -344,9 +344,15 @@ def loss_step(
         raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")
 
     if guidance_scale != 0:
-        # Chunk the noise and model_pred into two parts and compute the loss on each part separately.
-        model_pred_uncond, model_pred_text = torch.chunk(model_pred, 2, dim=0)
-        model_pred = model_pred_uncond + guidance_scale * (model_pred_text - model_pred_uncond)
+        uncond_encoder_hidden_states = get_extended_embeddings(
+            text_encoder,
+            batch["negative_input_ids"],
+            batch["negative_attention_mask"]
+        )
+        uncond_encoder_hidden_states = uncond_encoder_hidden_states.to(dtype=unet.dtype)
+
+        model_pred_uncond = unet(noisy_latents, timesteps, uncond_encoder_hidden_states).sample
+        model_pred = model_pred_uncond + guidance_scale * (model_pred - model_pred_uncond)
 
         loss = F.mse_loss(model_pred.float(), target.float(), reduction="none")
     elif prior_loss_weight != 0:
-- 
cgit v1.2.3-70-g09d2