From e9dc712268e45d30451fc6fee8626a0a8af7ccdc Mon Sep 17 00:00:00 2001 From: Volpeon Date: Mon, 10 Apr 2023 12:57:21 +0200 Subject: Fix sample gen: models sometimes weren't in eval mode --- training/functional.py | 3 +++ training/strategy/dreambooth.py | 27 +++++++++++++-------------- training/strategy/lora.py | 2 +- training/strategy/ti.py | 25 ++++++++++++------------- 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/training/functional.py b/training/functional.py index 46d25f6..ff6d3a9 100644 --- a/training/functional.py +++ b/training/functional.py @@ -695,5 +695,8 @@ def train( callbacks=callbacks, ) + accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=False) + accelerator.unwrap_model(unet, keep_fp32_wrapper=False) + accelerator.end_training() accelerator.free_memory() diff --git a/training/strategy/dreambooth.py b/training/strategy/dreambooth.py index 42624cd..7cdfc7f 100644 --- a/training/strategy/dreambooth.py +++ b/training/strategy/dreambooth.py @@ -113,7 +113,7 @@ def dreambooth_strategy_callbacks( accelerator.clip_grad_norm_(itertools.chain(*params_to_clip), max_grad_norm) @torch.no_grad() - def on_after_optimize(_, lr: float): + def on_after_optimize(_, lrs: dict[str, float]): if ema_unet is not None: ema_unet.step(unet.parameters()) @@ -149,25 +149,24 @@ def dreambooth_strategy_callbacks( if torch.cuda.is_available(): torch.cuda.empty_cache() - @torch.no_grad() + @on_eval() def on_sample(step): - with ema_context(): - unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=True) - text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=True) + unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=True) + text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=True) - orig_unet_dtype = unet_.dtype - orig_text_encoder_dtype = text_encoder_.dtype + orig_unet_dtype = unet_.dtype + orig_text_encoder_dtype = text_encoder_.dtype - unet_.to(dtype=weight_dtype) - text_encoder_.to(dtype=weight_dtype) + unet_.to(dtype=weight_dtype) + text_encoder_.to(dtype=weight_dtype) - save_samples_(step=step, unet=unet_, text_encoder=text_encoder_) + save_samples_(step=step, unet=unet_, text_encoder=text_encoder_) - unet_.to(dtype=orig_unet_dtype) - text_encoder_.to(dtype=orig_text_encoder_dtype) + unet_.to(dtype=orig_unet_dtype) + text_encoder_.to(dtype=orig_text_encoder_dtype) - del unet_ - del text_encoder_ + del unet_ + del text_encoder_ if torch.cuda.is_available(): torch.cuda.empty_cache() diff --git a/training/strategy/lora.py b/training/strategy/lora.py index 73ec8f2..0f72a17 100644 --- a/training/strategy/lora.py +++ b/training/strategy/lora.py @@ -146,7 +146,7 @@ def lora_strategy_callbacks( if torch.cuda.is_available(): torch.cuda.empty_cache() - @torch.no_grad() + @on_eval() def on_sample(step): unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=True) text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=True) diff --git a/training/strategy/ti.py b/training/strategy/ti.py index 363c3f9..f00045f 100644 --- a/training/strategy/ti.py +++ b/training/strategy/ti.py @@ -142,25 +142,24 @@ def textual_inversion_strategy_callbacks( checkpoint_output_dir / f"{slugify(token)}_{step}_{postfix}.bin" ) - @torch.no_grad() + @on_eval() def on_sample(step): - with ema_context(): - unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=True) - text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=True) + unet_ = accelerator.unwrap_model(unet, keep_fp32_wrapper=True) + text_encoder_ = accelerator.unwrap_model(text_encoder, keep_fp32_wrapper=True) - orig_unet_dtype = unet_.dtype - orig_text_encoder_dtype = text_encoder_.dtype + orig_unet_dtype = unet_.dtype + orig_text_encoder_dtype = text_encoder_.dtype - unet_.to(dtype=weight_dtype) - text_encoder_.to(dtype=weight_dtype) + unet_.to(dtype=weight_dtype) + text_encoder_.to(dtype=weight_dtype) - save_samples_(step=step, unet=unet_, text_encoder=text_encoder_) + save_samples_(step=step, unet=unet_, text_encoder=text_encoder_) - unet_.to(dtype=orig_unet_dtype) - text_encoder_.to(dtype=orig_text_encoder_dtype) + unet_.to(dtype=orig_unet_dtype) + text_encoder_.to(dtype=orig_text_encoder_dtype) - del unet_ - del text_encoder_ + del unet_ + del text_encoder_ if torch.cuda.is_available(): torch.cuda.empty_cache() -- cgit v1.2.3-70-g09d2