diff options
| author | Volpeon <git@volpeon.ink> | 2023-04-02 08:42:33 +0200 |
|---|---|---|
| committer | Volpeon <git@volpeon.ink> | 2023-04-02 08:42:33 +0200 |
| commit | 11e6f8f88483e6cfdccd66ad758ae1dfcfc0283b (patch) | |
| tree | e66c62abb974c01769285b1c01c748e6c49cc97b | |
| parent | Revert (diff) | |
| download | textual-inversion-diff-11e6f8f88483e6cfdccd66ad758ae1dfcfc0283b.tar.gz textual-inversion-diff-11e6f8f88483e6cfdccd66ad758ae1dfcfc0283b.tar.bz2 textual-inversion-diff-11e6f8f88483e6cfdccd66ad758ae1dfcfc0283b.zip | |
Lora: Only register params with grad to optimizer
| -rw-r--r-- | train_lora.py | 10 | ||||
| -rw-r--r-- | training/strategy/lora.py | 4 | ||||
| -rw-r--r-- | training/strategy/ti.py | 1 |
3 files changed, 7 insertions, 8 deletions
diff --git a/train_lora.py b/train_lora.py index 8fc2d69..cf73645 100644 --- a/train_lora.py +++ b/train_lora.py | |||
| @@ -662,9 +662,13 @@ def main(): | |||
| 662 | sample_frequency = math.ceil(num_train_epochs * (sample_frequency / args.num_train_steps)) | 662 | sample_frequency = math.ceil(num_train_epochs * (sample_frequency / args.num_train_steps)) |
| 663 | 663 | ||
| 664 | optimizer = create_optimizer( | 664 | optimizer = create_optimizer( |
| 665 | itertools.chain( | 665 | ( |
| 666 | unet.parameters(), | 666 | param |
| 667 | text_encoder.parameters(), | 667 | for param in itertools.chain( |
| 668 | unet.parameters(), | ||
| 669 | text_encoder.parameters(), | ||
| 670 | ) | ||
| 671 | if param.requires_grad | ||
| 668 | ), | 672 | ), |
| 669 | lr=args.learning_rate, | 673 | lr=args.learning_rate, |
| 670 | ) | 674 | ) |
diff --git a/training/strategy/lora.py b/training/strategy/lora.py index 8905171..209785a 100644 --- a/training/strategy/lora.py +++ b/training/strategy/lora.py | |||
| @@ -139,10 +139,6 @@ def lora_prepare( | |||
| 139 | train_dataloader: DataLoader, | 139 | train_dataloader: DataLoader, |
| 140 | val_dataloader: Optional[DataLoader], | 140 | val_dataloader: Optional[DataLoader], |
| 141 | lr_scheduler: torch.optim.lr_scheduler._LRScheduler, | 141 | lr_scheduler: torch.optim.lr_scheduler._LRScheduler, |
| 142 | lora_rank: int = 4, | ||
| 143 | lora_alpha: int = 32, | ||
| 144 | lora_dropout: float = 0, | ||
| 145 | lora_bias: str = "none", | ||
| 146 | **kwargs | 142 | **kwargs |
| 147 | ): | 143 | ): |
| 148 | return accelerator.prepare(text_encoder, unet, optimizer, train_dataloader, val_dataloader, lr_scheduler) + ({},) | 144 | return accelerator.prepare(text_encoder, unet, optimizer, train_dataloader, val_dataloader, lr_scheduler) + ({},) |
diff --git a/training/strategy/ti.py b/training/strategy/ti.py index 677f5a3..c7520ed 100644 --- a/training/strategy/ti.py +++ b/training/strategy/ti.py | |||
| @@ -209,7 +209,6 @@ def textual_inversion_prepare( | |||
| 209 | text_encoder.text_model.final_layer_norm.requires_grad_(False) | 209 | text_encoder.text_model.final_layer_norm.requires_grad_(False) |
| 210 | text_encoder.text_model.embeddings.position_embedding.requires_grad_(False) | 210 | text_encoder.text_model.embeddings.position_embedding.requires_grad_(False) |
| 211 | text_encoder.text_model.embeddings.token_embedding.requires_grad_(False) | 211 | text_encoder.text_model.embeddings.token_embedding.requires_grad_(False) |
| 212 | text_encoder.eval() | ||
| 213 | 212 | ||
| 214 | return text_encoder, unet, optimizer, train_dataloader, val_dataloader, lr_scheduler, {} | 213 | return text_encoder, unet, optimizer, train_dataloader, val_dataloader, lr_scheduler, {} |
| 215 | 214 | ||
