diff options
author | Volpeon <git@volpeon.ink> | 2023-01-17 11:50:16 +0100 |
---|---|---|
committer | Volpeon <git@volpeon.ink> | 2023-01-17 11:50:16 +0100 |
commit | 555912a86b012382a78f1b2717c2e0fde5994a04 (patch) | |
tree | 7569fa157ae63134febe569bc7a58933c2cf4b3c /training | |
parent | Update (diff) | |
download | textual-inversion-diff-555912a86b012382a78f1b2717c2e0fde5994a04.tar.gz textual-inversion-diff-555912a86b012382a78f1b2717c2e0fde5994a04.tar.bz2 textual-inversion-diff-555912a86b012382a78f1b2717c2e0fde5994a04.zip |
Make embedding decay work like Adam decay
Diffstat (limited to 'training')
-rw-r--r-- | training/strategy/ti.py | 14 |
1 files changed, 5 insertions, 9 deletions
diff --git a/training/strategy/ti.py b/training/strategy/ti.py index 081180f..eb6730b 100644 --- a/training/strategy/ti.py +++ b/training/strategy/ti.py | |||
@@ -32,12 +32,10 @@ def textual_inversion_strategy_callbacks( | |||
32 | seed: int, | 32 | seed: int, |
33 | placeholder_tokens: list[str], | 33 | placeholder_tokens: list[str], |
34 | placeholder_token_ids: list[list[int]], | 34 | placeholder_token_ids: list[list[int]], |
35 | learning_rate: float, | ||
36 | gradient_checkpointing: bool = False, | 35 | gradient_checkpointing: bool = False, |
37 | use_emb_decay: bool = False, | 36 | use_emb_decay: bool = False, |
38 | emb_decay_target: float = 0.4, | 37 | emb_decay_target: float = 0.4, |
39 | emb_decay_factor: float = 1, | 38 | emb_decay: float = 1e-2, |
40 | emb_decay_start: float = 0, | ||
41 | use_ema: bool = False, | 39 | use_ema: bool = False, |
42 | ema_inv_gamma: float = 1.0, | 40 | ema_inv_gamma: float = 1.0, |
43 | ema_power: int = 1, | 41 | ema_power: int = 1, |
@@ -120,17 +118,15 @@ def textual_inversion_strategy_callbacks( | |||
120 | yield | 118 | yield |
121 | 119 | ||
122 | def on_after_optimize(lr: float): | 120 | def on_after_optimize(lr: float): |
123 | if ema_embeddings is not None: | ||
124 | ema_embeddings.step(text_encoder.text_model.embeddings.temp_token_embedding.parameters()) | ||
125 | |||
126 | @torch.no_grad() | ||
127 | def on_after_epoch(lr: float): | ||
128 | if use_emb_decay: | 121 | if use_emb_decay: |
129 | text_encoder.text_model.embeddings.normalize( | 122 | text_encoder.text_model.embeddings.normalize( |
130 | emb_decay_target, | 123 | emb_decay_target, |
131 | min(1.0, max(0.0, emb_decay_factor * ((lr - emb_decay_start) / (learning_rate - emb_decay_start)))) | 124 | min(1.0, emb_decay * lr) |
132 | ) | 125 | ) |
133 | 126 | ||
127 | if ema_embeddings is not None: | ||
128 | ema_embeddings.step(text_encoder.text_model.embeddings.temp_token_embedding.parameters()) | ||
129 | |||
134 | def on_log(): | 130 | def on_log(): |
135 | if ema_embeddings is not None: | 131 | if ema_embeddings is not None: |
136 | return {"ema_decay": ema_embeddings.decay} | 132 | return {"ema_decay": ema_embeddings.decay} |