1 files changed, 25 insertions, 11 deletions
diff --git a/dreambooth.py b/dreambooth.py
index 675320b..3110c6d 100644
--- a/dreambooth.py
+++ b/dreambooth.py
@@ -118,6 +118,12 @@ def parse_args():
        help="The output directory where the model predictions and checkpoints will be written.",
    )
    parser.add_argument(
+        "--embeddings_dir",
+        type=str,
+        default="embeddings_ti",
+        help="The embeddings directory where Textual Inversion embeddings are stored.",
+    )
+    parser.add_argument(
        "--seed",
        type=int,
        default=None,
@@ -521,7 +527,7 @@ class Checkpointer:
                        negative_prompt=nprompt,
                        height=self.sample_image_size,
                        width=self.sample_image_size,
-                        latents_or_image=latents[:len(prompt)] if latents is not None else None,
+                        image=latents[:len(prompt)] if latents is not None else None,
                        generator=generator if latents is not None else None,
                        guidance_scale=guidance_scale,
                        eta=eta,
@@ -567,6 +573,8 @@ def main():
    basepath = Path(args.output_dir).joinpath(slugify(instance_identifier), now)
    basepath.mkdir(parents=True, exist_ok=True)
+    embeddings_dir = Path(args.embeddings_dir)
    accelerator = Accelerator(
        log_with=LoggerType.TENSORBOARD,
        logging_dir=f"{basepath}",
@@ -630,15 +638,25 @@ def main():
        placeholder_token_id = tokenizer.convert_tokens_to_ids(args.placeholder_token)
+        # Resize the token embeddings as we are adding new special tokens to the tokenizer
+        text_encoder.resize_token_embeddings(len(tokenizer))
+        token_embeds = text_encoder.get_input_embeddings().weight.data
        print(f"Token ID mappings:")
        for (token_id, token) in zip(placeholder_token_id, args.placeholder_token):
            print(f"- {token_id} {token}")
-        # Resize the token embeddings as we are adding new special tokens to the tokenizer
+            embedding_file = embeddings_dir.joinpath(f"{token}.bin")
-        text_encoder.resize_token_embeddings(len(tokenizer))
+            if embedding_file.exists() and embedding_file.is_file():
+                embedding_data = torch.load(embedding_file, map_location="cpu")
+                emb = next(iter(embedding_data.values()))
+                if len(emb.shape) == 1:
+                    emb = emb.unsqueeze(0)
+                token_embeds[token_id] = emb
-        # Initialise the newly added placeholder token with the embeddings of the initializer token
-        token_embeds = text_encoder.get_input_embeddings().weight.data
        original_token_embeds = token_embeds.detach().clone().to(accelerator.device)
        initializer_token_embeddings = text_encoder.get_input_embeddings()(initializer_token_ids)
@@ -959,8 +977,6 @@ def main():
                    else:
                        raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")
-                    del timesteps, noise, latents, noisy_latents, encoder_hidden_states
                    if args.num_class_images != 0:
                        # Chunk the noise and model_pred into two parts and compute the loss on each part separately.
                        model_pred, model_pred_prior = torch.chunk(model_pred, 2, dim=0)
@@ -977,6 +993,8 @@ def main():
                    else:
                        loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
+                    acc = (model_pred == latents).float().mean()
                    accelerator.backward(loss)
                    if not args.train_text_encoder:
@@ -1004,8 +1022,6 @@ def main():
                        ema_unet.step(unet)
                    optimizer.zero_grad(set_to_none=True)
-                    acc = (model_pred == latents).float().mean()
                    avg_loss.update(loss.detach_(), bsz)
                    avg_acc.update(acc.detach_(), bsz)
@@ -1069,8 +1085,6 @@ def main():
                    else:
                        raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")
-                    del timesteps, noise, latents, noisy_latents, encoder_hidden_states
                    loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")
                    acc = (model_pred == latents).float().mean()