From 86e908656bcd7585ec45cd930176800f759f146a Mon Sep 17 00:00:00 2001
From: Volpeon <git@volpeon.ink>
Date: Sat, 1 Apr 2023 17:33:00 +0200
Subject: Combined TI with embedding and LoRA

---
 training/strategy/ti.py | 76 ++++++++++++-------------------------------------
 1 file changed, 18 insertions(+), 58 deletions(-)

(limited to 'training')

diff --git a/training/strategy/ti.py b/training/strategy/ti.py
index 19b8d25..33f5fb9 100644
--- a/training/strategy/ti.py
+++ b/training/strategy/ti.py
@@ -1,6 +1,6 @@
 from typing import Optional
 from functools import partial
-from contextlib import contextmanager, nullcontext
+from contextlib import contextmanager
 from pathlib import Path
 
 import torch
@@ -13,7 +13,6 @@ from diffusers import AutoencoderKL, UNet2DConditionModel, DPMSolverMultistepSch
 from slugify import slugify
 
 from models.clip.tokenizer import MultiCLIPTokenizer
-from training.util import EMAModel
 from training.functional import TrainingStrategy, TrainingCallbacks, save_samples
 
 
@@ -32,10 +31,6 @@ def textual_inversion_strategy_callbacks(
     placeholder_tokens: list[str],
     placeholder_token_ids: list[list[int]],
     gradient_checkpointing: bool = False,
-    use_ema: bool = False,
-    ema_inv_gamma: float = 1.0,
-    ema_power: int = 1,
-    ema_max_decay: float = 0.9999,
     sample_batch_size: int = 1,
     sample_num_batches: int = 1,
     sample_num_steps: int = 20,
@@ -68,25 +63,6 @@ def textual_inversion_strategy_callbacks(
         image_size=sample_image_size,
     )
 
-    if use_ema:
-        ema_embeddings = EMAModel(
-            text_encoder.text_model.embeddings.overlay.parameters(),
-            inv_gamma=ema_inv_gamma,
-            power=ema_power,
-            max_value=ema_max_decay,
-        )
-        ema_embeddings.to(accelerator.device)
-    else:
-        ema_embeddings = None
-
-    def ema_context():
-        if ema_embeddings is not None:
-            return ema_embeddings.apply_temporary(
-                text_encoder.text_model.embeddings.overlay.parameters()
-            )
-        else:
-            return nullcontext()
-
     def on_accum_model():
         return text_encoder.text_model.embeddings.overlay
 
@@ -98,50 +74,36 @@ def textual_inversion_strategy_callbacks(
     @contextmanager
     def on_eval():
         tokenizer.eval()
-
-        with ema_context():
-            yield
-
-    @torch.no_grad()
-    def on_after_optimize(zero_ids, lr: float):
-        if ema_embeddings is not None:
-            ema_embeddings.step(text_encoder.text_model.embeddings.overlay.parameters())
-
-    def on_log():
-        if ema_embeddings is not None:
-            return {"ema_decay": ema_embeddings.decay}
-        return {}
+        yield
 
     @torch.no_grad()
     def on_checkpoint(step, postfix):
         print(f"Saving checkpoint for step {step}...")
 
-        with ema_context():
-            for (token, ids) in zip(placeholder_tokens, placeholder_token_ids):
-                text_encoder.text_model.embeddings.save_embed(
-                    ids,
-                    checkpoint_output_dir / f"{slugify(token)}_{step}_{postfix}.bin"
-                )
+        for (token, ids) in zip(placeholder_tokens, placeholder_token_ids):
+            text_encoder.text_model.embeddings.save_embed(
+                ids,
+                checkpoint_output_dir / f"{slugify(token)}_{step}_{postfix}.bin"
+            )
 
     @torch.no_grad()
     def on_sample(step):
-        with ema_context():
-            unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=True)
-            text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=True)
+        unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=True)
+        text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=True)
 
-            orig_unet_dtype = unet_.dtype
-            orig_text_encoder_dtype = text_encoder_.dtype
+        orig_unet_dtype = unet_.dtype
+        orig_text_encoder_dtype = text_encoder_.dtype
 
-            unet_.to(dtype=weight_dtype)
-            text_encoder_.to(dtype=weight_dtype)
+        unet_.to(dtype=weight_dtype)
+        text_encoder_.to(dtype=weight_dtype)
 
-            save_samples_(step=step, unet=unet_, text_encoder=text_encoder_)
+        save_samples_(step=step, unet=unet_, text_encoder=text_encoder_)
 
-            unet_.to(dtype=orig_unet_dtype)
-            text_encoder_.to(dtype=orig_text_encoder_dtype)
+        unet_.to(dtype=orig_unet_dtype)
+        text_encoder_.to(dtype=orig_text_encoder_dtype)
 
-            del unet_
-            del text_encoder_
+        del unet_
+        del text_encoder_
 
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
@@ -150,8 +112,6 @@ def textual_inversion_strategy_callbacks(
         on_accum_model=on_accum_model,
         on_train=on_train,
         on_eval=on_eval,
-        on_after_optimize=on_after_optimize,
-        on_log=on_log,
         on_checkpoint=on_checkpoint,
         on_sample=on_sample,
     )
-- 
cgit v1.2.3-70-g09d2