diff options
author | Volpeon <git@volpeon.ink> | 2023-04-02 08:42:33 +0200 |
---|---|---|
committer | Volpeon <git@volpeon.ink> | 2023-04-02 08:42:33 +0200 |
commit | 11e6f8f88483e6cfdccd66ad758ae1dfcfc0283b (patch) | |
tree | e66c62abb974c01769285b1c01c748e6c49cc97b | |
parent | Revert (diff) | |
download | textual-inversion-diff-11e6f8f88483e6cfdccd66ad758ae1dfcfc0283b.tar.gz textual-inversion-diff-11e6f8f88483e6cfdccd66ad758ae1dfcfc0283b.tar.bz2 textual-inversion-diff-11e6f8f88483e6cfdccd66ad758ae1dfcfc0283b.zip |
Lora: Only register params with grad to optimizer
-rw-r--r-- | train_lora.py | 10 | ||||
-rw-r--r-- | training/strategy/lora.py | 4 | ||||
-rw-r--r-- | training/strategy/ti.py | 1 |
3 files changed, 7 insertions, 8 deletions
diff --git a/train_lora.py b/train_lora.py index 8fc2d69..cf73645 100644 --- a/train_lora.py +++ b/train_lora.py | |||
@@ -662,9 +662,13 @@ def main(): | |||
662 | sample_frequency = math.ceil(num_train_epochs * (sample_frequency / args.num_train_steps)) | 662 | sample_frequency = math.ceil(num_train_epochs * (sample_frequency / args.num_train_steps)) |
663 | 663 | ||
664 | optimizer = create_optimizer( | 664 | optimizer = create_optimizer( |
665 | itertools.chain( | 665 | ( |
666 | unet.parameters(), | 666 | param |
667 | text_encoder.parameters(), | 667 | for param in itertools.chain( |
668 | unet.parameters(), | ||
669 | text_encoder.parameters(), | ||
670 | ) | ||
671 | if param.requires_grad | ||
668 | ), | 672 | ), |
669 | lr=args.learning_rate, | 673 | lr=args.learning_rate, |
670 | ) | 674 | ) |
diff --git a/training/strategy/lora.py b/training/strategy/lora.py index 8905171..209785a 100644 --- a/training/strategy/lora.py +++ b/training/strategy/lora.py | |||
@@ -139,10 +139,6 @@ def lora_prepare( | |||
139 | train_dataloader: DataLoader, | 139 | train_dataloader: DataLoader, |
140 | val_dataloader: Optional[DataLoader], | 140 | val_dataloader: Optional[DataLoader], |
141 | lr_scheduler: torch.optim.lr_scheduler._LRScheduler, | 141 | lr_scheduler: torch.optim.lr_scheduler._LRScheduler, |
142 | lora_rank: int = 4, | ||
143 | lora_alpha: int = 32, | ||
144 | lora_dropout: float = 0, | ||
145 | lora_bias: str = "none", | ||
146 | **kwargs | 142 | **kwargs |
147 | ): | 143 | ): |
148 | return accelerator.prepare(text_encoder, unet, optimizer, train_dataloader, val_dataloader, lr_scheduler) + ({},) | 144 | return accelerator.prepare(text_encoder, unet, optimizer, train_dataloader, val_dataloader, lr_scheduler) + ({},) |
diff --git a/training/strategy/ti.py b/training/strategy/ti.py index 677f5a3..c7520ed 100644 --- a/training/strategy/ti.py +++ b/training/strategy/ti.py | |||
@@ -209,7 +209,6 @@ def textual_inversion_prepare( | |||
209 | text_encoder.text_model.final_layer_norm.requires_grad_(False) | 209 | text_encoder.text_model.final_layer_norm.requires_grad_(False) |
210 | text_encoder.text_model.embeddings.position_embedding.requires_grad_(False) | 210 | text_encoder.text_model.embeddings.position_embedding.requires_grad_(False) |
211 | text_encoder.text_model.embeddings.token_embedding.requires_grad_(False) | 211 | text_encoder.text_model.embeddings.token_embedding.requires_grad_(False) |
212 | text_encoder.eval() | ||
213 | 212 | ||
214 | return text_encoder, unet, optimizer, train_dataloader, val_dataloader, lr_scheduler, {} | 213 | return text_encoder, unet, optimizer, train_dataloader, val_dataloader, lr_scheduler, {} |
215 | 214 | ||