4 files changed, 14 insertions, 17 deletions
diff --git a/training/functional.py b/training/functional.py
index ebb48ab..015fe5e 100644
--- a/training/functional.py
+++ b/training/functional.py
@@ -259,7 +259,7 @@ def snr_weight(noisy_latents, latents, gamma):
        sigma_mean_sq = F.mse_loss(sigma.float(), zeros.float(), reduction="none").mean([1, 2, 3])
        snr = torch.div(alpha_mean_sq, sigma_mean_sq)
        gamma_over_snr = torch.div(torch.ones_like(snr) * gamma, snr)
-        snr_weight = torch.minimum(gamma_over_snr, torch.ones_like(gamma_over_snr)).float()
+        snr_weight = torch.fmin(gamma_over_snr, torch.ones_like(gamma_over_snr)).float()
        return snr_weight
    return torch.tensor(
@@ -471,10 +471,7 @@ def train_loop(
                        "lr": lr_scheduler.get_last_lr()[0],
                    }
                    if isDadaptation:
-                        logs["lr/d*lr"] = (
+                        logs["lr/d*lr"] = optimizer.param_groups[0]["d"] * optimizer.param_groups[0]["lr"]
-                            optimizer.param_groups[0]["d"] *
-                            optimizer.param_groups[0]["lr"]
-                        )
                    logs.update(on_log())
                    local_progress_bar.set_postfix(**logs)
diff --git a/training/strategy/dreambooth.py b/training/strategy/dreambooth.py
index e5e84c8..28fccff 100644
--- a/training/strategy/dreambooth.py
+++ b/training/strategy/dreambooth.py
@@ -137,8 +137,8 @@ def dreambooth_strategy_callbacks(
        print("Saving model...")
-        unet_ = accelerator.unwrap_model(unet, False)
+        unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=False)
-        text_encoder_ = accelerator.unwrap_model(text_encoder, False)
+        text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=False)
        with ema_context():
            pipeline = VlpnStableDiffusion(
@@ -160,8 +160,8 @@ def dreambooth_strategy_callbacks(
    @torch.no_grad()
    def on_sample(step):
        with ema_context():
-            unet_ = accelerator.unwrap_model(unet, False)
+            unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=True)
-            text_encoder_ = accelerator.unwrap_model(text_encoder, False)
+            text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=True)
            orig_unet_dtype = unet_.dtype
            orig_text_encoder_dtype = text_encoder_.dtype
diff --git a/training/strategy/lora.py b/training/strategy/lora.py
index aa75bec..1c8fad6 100644
--- a/training/strategy/lora.py
+++ b/training/strategy/lora.py
@@ -47,7 +47,6 @@ def lora_strategy_callbacks(
    save_samples_ = partial(
        save_samples,
        accelerator=accelerator,
-        text_encoder=text_encoder,
        tokenizer=tokenizer,
        vae=vae,
        sample_scheduler=sample_scheduler,
@@ -72,6 +71,7 @@ def lora_strategy_callbacks(
    @contextmanager
    def on_train(epoch: int):
        tokenizer.train()
+        text_encoder.train()
        yield
    @contextmanager
@@ -89,8 +89,8 @@ def lora_strategy_callbacks(
    def on_checkpoint(step, postfix):
        print(f"Saving checkpoint for step {step}...")
-        unet_ = accelerator.unwrap_model(unet, False)
+        unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=False)
-        text_encoder_ = accelerator.unwrap_model(text_encoder, False)
+        text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=False)
        lora_config = {}
        state_dict = get_peft_model_state_dict(unet, state_dict=accelerator.get_state_dict(unet))
@@ -111,10 +111,10 @@ def lora_strategy_callbacks(
    @torch.no_grad()
    def on_sample(step):
-        unet_ = accelerator.unwrap_model(unet, False)
+        unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=True)
-        text_encoder_ = accelerator.unwrap_model(text_encoder, False)
+        text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=True)
-        save_samples_(step=step, unet=unet_)
+        save_samples_(step=step, unet=unet_, text_encoder=text_encoder_)
        del unet_
        del text_encoder_
diff --git a/training/strategy/ti.py b/training/strategy/ti.py
index bd0d178..2038e34 100644
--- a/training/strategy/ti.py
+++ b/training/strategy/ti.py
@@ -156,8 +156,8 @@ def textual_inversion_strategy_callbacks(
    @torch.no_grad()
    def on_sample(step):
        with ema_context():
-            unet_ = accelerator.unwrap_model(unet, False)
+            unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=True)
-            text_encoder_ = accelerator.unwrap_model(text_encoder, False)
+            text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=True)
            orig_unet_dtype = unet_.dtype
            orig_text_encoder_dtype = text_encoder_.dtype