diff options
| -rw-r--r-- | data/csv.py | 13 | ||||
| -rw-r--r-- | dreambooth.py | 91 |
2 files changed, 68 insertions, 36 deletions
diff --git a/data/csv.py b/data/csv.py index 23b5299..9125212 100644 --- a/data/csv.py +++ b/data/csv.py | |||
| @@ -16,14 +16,17 @@ def prepare_prompt(prompt: Union[str, Dict[str, str]]): | |||
| 16 | return {"content": prompt} if isinstance(prompt, str) else prompt | 16 | return {"content": prompt} if isinstance(prompt, str) else prompt |
| 17 | 17 | ||
| 18 | 18 | ||
| 19 | def shuffle_prompt(prompt: str): | 19 | def shuffle_prompt(prompt: str, dropout: float = 0): |
| 20 | def handle_block(block: str): | 20 | def handle_block(block: str): |
| 21 | words = block.split(", ") | 21 | words = block.split(", ") |
| 22 | words = [w for w in words if w != ""] | ||
| 23 | if dropout != 0: | ||
| 24 | words = [w for w in words if np.random.random() > dropout] | ||
| 22 | np.random.shuffle(words) | 25 | np.random.shuffle(words) |
| 23 | return ", ".join(words) | 26 | return ", ".join(words) |
| 24 | 27 | ||
| 25 | prompt = prompt.split(". ") | 28 | prompt = prompt.split(". ") |
| 26 | prompt = [handle_block(b) for b in prompt] | 29 | prompt = [handle_block(b) for b in prompt if b != ""] |
| 27 | np.random.shuffle(prompt) | 30 | np.random.shuffle(prompt) |
| 28 | prompt = ". ".join(prompt) | 31 | prompt = ". ".join(prompt) |
| 29 | return prompt | 32 | return prompt |
| @@ -48,6 +51,7 @@ class CSVDataModule(pl.LightningDataModule): | |||
| 48 | num_class_images: int = 100, | 51 | num_class_images: int = 100, |
| 49 | size: int = 512, | 52 | size: int = 512, |
| 50 | repeats: int = 1, | 53 | repeats: int = 1, |
| 54 | dropout: float = 0, | ||
| 51 | interpolation: str = "bicubic", | 55 | interpolation: str = "bicubic", |
| 52 | center_crop: bool = False, | 56 | center_crop: bool = False, |
| 53 | valid_set_size: Optional[int] = None, | 57 | valid_set_size: Optional[int] = None, |
| @@ -72,6 +76,7 @@ class CSVDataModule(pl.LightningDataModule): | |||
| 72 | self.class_identifier = class_identifier | 76 | self.class_identifier = class_identifier |
| 73 | self.size = size | 77 | self.size = size |
| 74 | self.repeats = repeats | 78 | self.repeats = repeats |
| 79 | self.dropout = dropout | ||
| 75 | self.center_crop = center_crop | 80 | self.center_crop = center_crop |
| 76 | self.interpolation = interpolation | 81 | self.interpolation = interpolation |
| 77 | self.valid_set_size = valid_set_size | 82 | self.valid_set_size = valid_set_size |
| @@ -123,7 +128,7 @@ class CSVDataModule(pl.LightningDataModule): | |||
| 123 | instance_identifier=self.instance_identifier, class_identifier=self.class_identifier, | 128 | instance_identifier=self.instance_identifier, class_identifier=self.class_identifier, |
| 124 | num_class_images=self.num_class_images, | 129 | num_class_images=self.num_class_images, |
| 125 | size=self.size, interpolation=self.interpolation, | 130 | size=self.size, interpolation=self.interpolation, |
| 126 | center_crop=self.center_crop, repeats=self.repeats) | 131 | center_crop=self.center_crop, repeats=self.repeats, dropout=self.dropout) |
| 127 | val_dataset = CSVDataset(self.data_val, self.prompt_processor, batch_size=self.batch_size, | 132 | val_dataset = CSVDataset(self.data_val, self.prompt_processor, batch_size=self.batch_size, |
| 128 | instance_identifier=self.instance_identifier, | 133 | instance_identifier=self.instance_identifier, |
| 129 | size=self.size, interpolation=self.interpolation, | 134 | size=self.size, interpolation=self.interpolation, |
| @@ -153,6 +158,7 @@ class CSVDataset(Dataset): | |||
| 153 | num_class_images: int = 0, | 158 | num_class_images: int = 0, |
| 154 | size: int = 512, | 159 | size: int = 512, |
| 155 | repeats: int = 1, | 160 | repeats: int = 1, |
| 161 | dropout: float = 0, | ||
| 156 | interpolation: str = "bicubic", | 162 | interpolation: str = "bicubic", |
| 157 | center_crop: bool = False, | 163 | center_crop: bool = False, |
| 158 | ): | 164 | ): |
| @@ -163,6 +169,7 @@ class CSVDataset(Dataset): | |||
| 163 | self.instance_identifier = instance_identifier | 169 | self.instance_identifier = instance_identifier |
| 164 | self.class_identifier = class_identifier | 170 | self.class_identifier = class_identifier |
| 165 | self.num_class_images = num_class_images | 171 | self.num_class_images = num_class_images |
| 172 | self.dropout = dropout | ||
| 166 | self.image_cache = {} | 173 | self.image_cache = {} |
| 167 | 174 | ||
| 168 | self.num_instance_images = len(self.data) | 175 | self.num_instance_images = len(self.data) |
diff --git a/dreambooth.py b/dreambooth.py index 1ef5156..1d6735f 100644 --- a/dreambooth.py +++ b/dreambooth.py | |||
| @@ -89,6 +89,17 @@ def parse_args(): | |||
| 89 | help="Whether to train the whole text encoder." | 89 | help="Whether to train the whole text encoder." |
| 90 | ) | 90 | ) |
| 91 | parser.add_argument( | 91 | parser.add_argument( |
| 92 | "--train_text_encoder_epochs", | ||
| 93 | default=999999, | ||
| 94 | help="Number of epochs the text encoder will be trained." | ||
| 95 | ) | ||
| 96 | parser.add_argument( | ||
| 97 | "--tag_dropout", | ||
| 98 | type=float, | ||
| 99 | default=0.1, | ||
| 100 | help="Tag dropout probability.", | ||
| 101 | ) | ||
| 102 | parser.add_argument( | ||
| 92 | "--num_class_images", | 103 | "--num_class_images", |
| 93 | type=int, | 104 | type=int, |
| 94 | default=400, | 105 | default=400, |
| @@ -185,9 +196,9 @@ def parse_args(): | |||
| 185 | ), | 196 | ), |
| 186 | ) | 197 | ) |
| 187 | parser.add_argument( | 198 | parser.add_argument( |
| 188 | "--lr_warmup_steps", | 199 | "--lr_warmup_epochs", |
| 189 | type=int, | 200 | type=int, |
| 190 | default=500, | 201 | default=20, |
| 191 | help="Number of steps for the warmup in the lr scheduler." | 202 | help="Number of steps for the warmup in the lr scheduler." |
| 192 | ) | 203 | ) |
| 193 | parser.add_argument( | 204 | parser.add_argument( |
| @@ -377,6 +388,20 @@ def make_grid(images, rows, cols): | |||
| 377 | return grid | 388 | return grid |
| 378 | 389 | ||
| 379 | 390 | ||
| 391 | class AverageMeter: | ||
| 392 | def __init__(self, name=None): | ||
| 393 | self.name = name | ||
| 394 | self.reset() | ||
| 395 | |||
| 396 | def reset(self): | ||
| 397 | self.sum = self.count = self.avg = 0 | ||
| 398 | |||
| 399 | def update(self, val, n=1): | ||
| 400 | self.sum += val * n | ||
| 401 | self.count += n | ||
| 402 | self.avg = self.sum / self.count | ||
| 403 | |||
| 404 | |||
| 380 | class Checkpointer: | 405 | class Checkpointer: |
| 381 | def __init__( | 406 | def __init__( |
| 382 | self, | 407 | self, |
| @@ -744,6 +769,7 @@ def main(): | |||
| 744 | num_class_images=args.num_class_images, | 769 | num_class_images=args.num_class_images, |
| 745 | size=args.resolution, | 770 | size=args.resolution, |
| 746 | repeats=args.repeats, | 771 | repeats=args.repeats, |
| 772 | dropout=args.tag_dropout, | ||
| 747 | center_crop=args.center_crop, | 773 | center_crop=args.center_crop, |
| 748 | valid_set_size=args.valid_set_size, | 774 | valid_set_size=args.valid_set_size, |
| 749 | num_workers=args.dataloader_num_workers, | 775 | num_workers=args.dataloader_num_workers, |
| @@ -802,6 +828,8 @@ def main(): | |||
| 802 | overrode_max_train_steps = True | 828 | overrode_max_train_steps = True |
| 803 | num_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) | 829 | num_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) |
| 804 | 830 | ||
| 831 | warmup_steps = args.lr_warmup_epochs * num_update_steps_per_epoch * args.gradient_accumulation_steps | ||
| 832 | |||
| 805 | if args.lr_scheduler == "one_cycle": | 833 | if args.lr_scheduler == "one_cycle": |
| 806 | lr_scheduler = get_one_cycle_schedule( | 834 | lr_scheduler = get_one_cycle_schedule( |
| 807 | optimizer=optimizer, | 835 | optimizer=optimizer, |
| @@ -810,16 +838,16 @@ def main(): | |||
| 810 | elif args.lr_scheduler == "cosine_with_restarts": | 838 | elif args.lr_scheduler == "cosine_with_restarts": |
| 811 | lr_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( | 839 | lr_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( |
| 812 | optimizer=optimizer, | 840 | optimizer=optimizer, |
| 813 | num_warmup_steps=args.lr_warmup_steps * args.gradient_accumulation_steps, | 841 | num_warmup_steps=warmup_steps, |
| 814 | num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, | 842 | num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, |
| 815 | num_cycles=args.lr_cycles or math.ceil(math.sqrt( | 843 | num_cycles=args.lr_cycles or math.ceil(math.sqrt( |
| 816 | ((args.max_train_steps - args.lr_warmup_steps) / num_update_steps_per_epoch))), | 844 | ((args.max_train_steps - warmup_steps) / num_update_steps_per_epoch))), |
| 817 | ) | 845 | ) |
| 818 | else: | 846 | else: |
| 819 | lr_scheduler = get_scheduler( | 847 | lr_scheduler = get_scheduler( |
| 820 | args.lr_scheduler, | 848 | args.lr_scheduler, |
| 821 | optimizer=optimizer, | 849 | optimizer=optimizer, |
| 822 | num_warmup_steps=args.lr_warmup_steps * args.gradient_accumulation_steps, | 850 | num_warmup_steps=warmup_steps, |
| 823 | num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, | 851 | num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, |
| 824 | ) | 852 | ) |
| 825 | 853 | ||
| @@ -863,11 +891,11 @@ def main(): | |||
| 863 | 891 | ||
| 864 | global_step = 0 | 892 | global_step = 0 |
| 865 | 893 | ||
| 866 | total_loss = 0.0 | 894 | avg_loss = AverageMeter() |
| 867 | total_acc = 0.0 | 895 | avg_acc = AverageMeter() |
| 868 | 896 | ||
| 869 | total_loss_val = 0.0 | 897 | avg_loss_val = AverageMeter() |
| 870 | total_acc_val = 0.0 | 898 | avg_acc_val = AverageMeter() |
| 871 | 899 | ||
| 872 | max_acc_val = 0.0 | 900 | max_acc_val = 0.0 |
| 873 | 901 | ||
| @@ -913,7 +941,11 @@ def main(): | |||
| 913 | local_progress_bar.reset() | 941 | local_progress_bar.reset() |
| 914 | 942 | ||
| 915 | unet.train() | 943 | unet.train() |
| 916 | text_encoder.train() | 944 | |
| 945 | if epoch < args.train_text_encoder_epochs: | ||
| 946 | text_encoder.train() | ||
| 947 | elif epoch == args.train_text_encoder_epochs: | ||
| 948 | freeze_params(text_encoder.parameters()) | ||
| 917 | 949 | ||
| 918 | sample_checkpoint = False | 950 | sample_checkpoint = False |
| 919 | 951 | ||
| @@ -980,7 +1012,7 @@ def main(): | |||
| 980 | if accelerator.sync_gradients: | 1012 | if accelerator.sync_gradients: |
| 981 | params_to_clip = ( | 1013 | params_to_clip = ( |
| 982 | itertools.chain(unet.parameters(), text_encoder.parameters()) | 1014 | itertools.chain(unet.parameters(), text_encoder.parameters()) |
| 983 | if args.train_text_encoder | 1015 | if args.train_text_encoder and epoch < args.train_text_encoder_epochs |
| 984 | else unet.parameters() | 1016 | else unet.parameters() |
| 985 | ) | 1017 | ) |
| 986 | accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm) | 1018 | accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm) |
| @@ -992,11 +1024,10 @@ def main(): | |||
| 992 | ema_unet.step(unet) | 1024 | ema_unet.step(unet) |
| 993 | optimizer.zero_grad(set_to_none=True) | 1025 | optimizer.zero_grad(set_to_none=True) |
| 994 | 1026 | ||
| 995 | acc = (model_pred == latents).float() | 1027 | acc = (model_pred == latents).float().mean() |
| 996 | acc = acc.mean() | ||
| 997 | 1028 | ||
| 998 | total_loss += loss.item() | 1029 | avg_loss.update(loss.detach_(), bsz) |
| 999 | total_acc += acc.item() | 1030 | avg_acc.update(acc.detach_(), bsz) |
| 1000 | 1031 | ||
| 1001 | # Checks if the accelerator has performed an optimization step behind the scenes | 1032 | # Checks if the accelerator has performed an optimization step behind the scenes |
| 1002 | if accelerator.sync_gradients: | 1033 | if accelerator.sync_gradients: |
| @@ -1013,8 +1044,8 @@ def main(): | |||
| 1013 | sample_checkpoint = True | 1044 | sample_checkpoint = True |
| 1014 | 1045 | ||
| 1015 | logs = { | 1046 | logs = { |
| 1016 | "train/loss": total_loss / global_step if global_step != 0 else 0, | 1047 | "train/loss": avg_loss.avg.item(), |
| 1017 | "train/acc": total_acc / global_step if global_step != 0 else 0, | 1048 | "train/acc": avg_acc.avg.item(), |
| 1018 | "train/cur_loss": loss.item(), | 1049 | "train/cur_loss": loss.item(), |
| 1019 | "train/cur_acc": acc.item(), | 1050 | "train/cur_acc": acc.item(), |
| 1020 | "lr/unet": lr_scheduler.get_last_lr()[0], | 1051 | "lr/unet": lr_scheduler.get_last_lr()[0], |
| @@ -1064,41 +1095,35 @@ def main(): | |||
| 1064 | 1095 | ||
| 1065 | loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean") | 1096 | loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean") |
| 1066 | 1097 | ||
| 1067 | acc = (model_pred == latents).float() | 1098 | acc = (model_pred == latents).float().mean() |
| 1068 | acc = acc.mean() | ||
| 1069 | 1099 | ||
| 1070 | total_loss_val += loss.item() | 1100 | avg_loss_val.update(loss.detach_(), bsz) |
| 1071 | total_acc_val += acc.item() | 1101 | avg_acc_val.update(acc.detach_(), bsz) |
| 1072 | 1102 | ||
| 1073 | if accelerator.sync_gradients: | 1103 | if accelerator.sync_gradients: |
| 1074 | local_progress_bar.update(1) | 1104 | local_progress_bar.update(1) |
| 1075 | global_progress_bar.update(1) | 1105 | global_progress_bar.update(1) |
| 1076 | 1106 | ||
| 1077 | logs = { | 1107 | logs = { |
| 1078 | "val/loss": total_loss_val / global_step, | 1108 | "val/loss": avg_loss_val.avg.item(), |
| 1079 | "val/acc": total_acc_val / global_step, | 1109 | "val/acc": avg_acc_val.avg.item(), |
| 1080 | "val/cur_loss": loss.item(), | 1110 | "val/cur_loss": loss.item(), |
| 1081 | "val/cur_acc": acc.item(), | 1111 | "val/cur_acc": acc.item(), |
| 1082 | } | 1112 | } |
| 1083 | local_progress_bar.set_postfix(**logs) | 1113 | local_progress_bar.set_postfix(**logs) |
| 1084 | 1114 | ||
| 1085 | val_step = (epoch + 1) * len(val_dataloader) | ||
| 1086 | avg_acc_val = total_acc_val / val_step | ||
| 1087 | avg_loss_val = total_loss_val / val_step | ||
| 1088 | |||
| 1089 | accelerator.log({ | 1115 | accelerator.log({ |
| 1090 | "val/loss": avg_loss_val, | 1116 | "val/loss": avg_loss_val.avg.item(), |
| 1091 | "val/acc": avg_acc_val, | 1117 | "val/acc": avg_acc_val.avg.item(), |
| 1092 | }, step=global_step) | 1118 | }, step=global_step) |
| 1093 | 1119 | ||
| 1094 | local_progress_bar.clear() | 1120 | local_progress_bar.clear() |
| 1095 | global_progress_bar.clear() | 1121 | global_progress_bar.clear() |
| 1096 | 1122 | ||
| 1097 | if avg_acc_val > max_acc_val: | 1123 | if avg_acc_val.avg.item() > max_acc_val: |
| 1098 | accelerator.print( | 1124 | accelerator.print( |
| 1099 | f"Global step {global_step}: Validation loss reached new maximum: {max_acc_val:.2e} -> {avg_acc_val:.2e}") | 1125 | f"Global step {global_step}: Validation accuracy reached new maximum: {max_acc_val:.2e} -> {avg_acc_val.avg.item():.2e}") |
| 1100 | checkpointer.save_embedding(global_step, "milestone") | 1126 | max_acc_val = avg_acc_val.avg.item() |
| 1101 | max_acc_val = avg_acc_val | ||
| 1102 | 1127 | ||
| 1103 | if sample_checkpoint and accelerator.is_main_process: | 1128 | if sample_checkpoint and accelerator.is_main_process: |
| 1104 | checkpointer.save_samples(global_step, args.sample_steps) | 1129 | checkpointer.save_samples(global_step, args.sample_steps) |
