Remove training guidance_scale

author: Volpeon <git@volpeon.ink> 2023-06-22 07:34:04 +0200
committer: Volpeon <git@volpeon.ink> 2023-06-22 07:34:04 +0200
commit: 11b7740deeef7903e81ba4c65a45853323a5fd5e (patch)
tree: 0d5eec0e574447afe80538252884e0093742e3cb
parent: Remove convnext (diff)
download: textual-inversion-diff-11b7740deeef7903e81ba4c65a45853323a5fd5e.tar.gz
textual-inversion-diff-11b7740deeef7903e81ba4c65a45853323a5fd5e.tar.bz2
textual-inversion-diff-11b7740deeef7903e81ba4c65a45853323a5fd5e.zip
5 files changed, 1 insertions, 42 deletions
diff --git a/data/csv.py b/data/csv.py
index d726033..43bf14c 100644
--- a/data/csv.py
+++ b/data/csv.py
@@ -108,7 +108,6 @@ def collate_fn(
    dtype: torch.dtype,
    tokenizer: CLIPTokenizer,
    max_token_id_length: Optional[int],
-    with_guidance: bool,
    with_prior_preservation: bool,
    examples,
 ):
@@ -195,7 +194,6 @@ class VlpnDataModule:
        tokenizer: CLIPTokenizer,
        constant_prompt_length: bool = False,
        class_subdir: str = "cls",
-        with_guidance: bool = False,
        num_class_images: int = 1,
        size: int = 768,
        num_buckets: int = 0,
@@ -228,7 +226,6 @@ class VlpnDataModule:
        self.class_root.mkdir(parents=True, exist_ok=True)
        self.placeholder_tokens = placeholder_tokens
        self.num_class_images = num_class_images
-        self.with_guidance = with_guidance
        self.constant_prompt_length = constant_prompt_length
        self.max_token_id_length = None
@@ -356,7 +353,6 @@ class VlpnDataModule:
            self.dtype,
            self.tokenizer,
            self.max_token_id_length,
-            self.with_guidance,
            self.num_class_images != 0,
        )
diff --git a/train_dreambooth.py b/train_dreambooth.py
index 0543a35..939a8f3 100644
--- a/train_dreambooth.py
+++ b/train_dreambooth.py
@@ -194,11 +194,6 @@ def parse_args():
        help="Shuffle tags.",
    )
    parser.add_argument(
-        "--guidance_scale",
-        type=float,
-        default=0,
-    )
-    parser.add_argument(
        "--num_class_images",
        type=int,
        default=0,
@@ -874,7 +869,6 @@ def main():
        dtype=weight_dtype,
        seed=args.seed,
        compile_unet=args.compile_unet,
-        guidance_scale=args.guidance_scale,
        prior_loss_weight=args.prior_loss_weight if args.num_class_images != 0 else 0,
        sample_scheduler=sample_scheduler,
        sample_batch_size=args.sample_batch_size,
@@ -893,7 +887,6 @@ def main():
        tokenizer=tokenizer,
        constant_prompt_length=args.compile_unet,
        class_subdir=args.class_image_dir,
-        with_guidance=args.guidance_scale != 0,
        num_class_images=args.num_class_images,
        size=args.resolution,
        num_buckets=args.num_buckets,
diff --git a/train_lora.py b/train_lora.py
index b7ee2d6..51dc827 100644
--- a/train_lora.py
+++ b/train_lora.py
@@ -207,11 +207,6 @@ def parse_args():
        help="Shuffle tags.",
    )
    parser.add_argument(
-        "--guidance_scale",
-        type=float,
-        default=0,
-    )
-    parser.add_argument(
        "--num_class_images",
        type=int,
        default=0,
@@ -998,7 +993,6 @@ def main():
        dtype=weight_dtype,
        seed=args.seed,
        compile_unet=args.compile_unet,
-        guidance_scale=args.guidance_scale,
        prior_loss_weight=args.prior_loss_weight if args.num_class_images != 0 else 0,
        sample_scheduler=sample_scheduler,
        sample_batch_size=args.sample_batch_size,
@@ -1022,7 +1016,6 @@ def main():
        tokenizer=tokenizer,
        constant_prompt_length=args.compile_unet,
        class_subdir=args.class_image_dir,
-        with_guidance=args.guidance_scale != 0,
        num_class_images=args.num_class_images,
        size=args.resolution,
        num_buckets=args.num_buckets,
diff --git a/train_ti.py b/train_ti.py
index 7d1ef19..7f93960 100644
--- a/train_ti.py
+++ b/train_ti.py
@@ -125,11 +125,6 @@ def parse_args():
        action="store_true",
    )
    parser.add_argument(
-        "--guidance_scale",
-        type=float,
-        default=0,
-    )
-    parser.add_argument(
        "--num_class_images",
        type=int,
        default=0,
@@ -852,7 +847,6 @@ def main():
        dtype=weight_dtype,
        seed=args.seed,
        compile_unet=args.compile_unet,
-        guidance_scale=args.guidance_scale,
        prior_loss_weight=args.prior_loss_weight if args.num_class_images != 0 else 0,
        no_val=args.valid_set_size == 0,
        strategy=textual_inversion_strategy,
@@ -923,7 +917,6 @@ def main():
            batch_size=args.train_batch_size,
            tokenizer=tokenizer,
            class_subdir=args.class_image_dir,
-            with_guidance=args.guidance_scale != 0,
            num_class_images=args.num_class_images,
            size=args.resolution,
            num_buckets=args.num_buckets,
diff --git a/training/functional.py b/training/functional.py
index a3d1f08..43b03ac 100644
--- a/training/functional.py
+++ b/training/functional.py
@@ -342,7 +342,6 @@ def loss_step(
    schedule_sampler: ScheduleSampler,
    unet: UNet2DConditionModel,
    text_encoder: CLIPTextModel,
-    guidance_scale: float,
    prior_loss_weight: float,
    seed: int,
    input_pertubation: float,
@@ -400,19 +399,6 @@ def loss_step(
        noisy_latents, timesteps, encoder_hidden_states, return_dict=False
    )[0]
-    if guidance_scale != 0:
-        uncond_encoder_hidden_states = get_extended_embeddings(
-            text_encoder, batch["negative_input_ids"], batch["negative_attention_mask"]
-        )
-        uncond_encoder_hidden_states = uncond_encoder_hidden_states.to(dtype=unet.dtype)
-        model_pred_uncond = unet(
-            noisy_latents, timesteps, uncond_encoder_hidden_states, return_dict=False
-        )[0]
-        model_pred = model_pred_uncond + guidance_scale * (
-            model_pred - model_pred_uncond
-        )
    # Get the target for loss depending on the prediction type
    if noise_scheduler.config.prediction_type == "epsilon":
        target = noise
@@ -425,7 +411,7 @@ def loss_step(
    acc = (model_pred == target).float().mean()
-    if guidance_scale == 0 and prior_loss_weight != 0:
+    if prior_loss_weight != 0:
        # Chunk the noise and model_pred into two parts and compute the loss on each part separately.
        model_pred, model_pred_prior = torch.chunk(model_pred, 2, dim=0)
        target, target_prior = torch.chunk(target, 2, dim=0)
@@ -727,7 +713,6 @@ def train(
    milestone_checkpoints: bool = True,
    cycle: int = 1,
    global_step_offset: int = 0,
-    guidance_scale: float = 0.0,
    prior_loss_weight: float = 1.0,
    input_pertubation: float = 0.1,
    schedule_sampler: Optional[ScheduleSampler] = None,
@@ -787,7 +772,6 @@ def train(
        schedule_sampler,
        unet,
        text_encoder,
-        guidance_scale,
        prior_loss_weight,
        seed,
        input_pertubation,
author	Volpeon <git@volpeon.ink>	2023-06-22 07:34:04 +0200
committer	Volpeon <git@volpeon.ink>	2023-06-22 07:34:04 +0200
commit	11b7740deeef7903e81ba4c65a45853323a5fd5e (patch)
tree	0d5eec0e574447afe80538252884e0093742e3cb
parent	Remove convnext (diff)
download	textual-inversion-diff-11b7740deeef7903e81ba4c65a45853323a5fd5e.tar.gz textual-inversion-diff-11b7740deeef7903e81ba4c65a45853323a5fd5e.tar.bz2 textual-inversion-diff-11b7740deeef7903e81ba4c65a45853323a5fd5e.zip