summaryrefslogtreecommitdiffstats
path: root/training/strategy
diff options
context:
space:
mode:
authorVolpeon <git@volpeon.ink>2023-01-17 11:50:16 +0100
committerVolpeon <git@volpeon.ink>2023-01-17 11:50:16 +0100
commit555912a86b012382a78f1b2717c2e0fde5994a04 (patch)
tree7569fa157ae63134febe569bc7a58933c2cf4b3c /training/strategy
parentUpdate (diff)
downloadtextual-inversion-diff-555912a86b012382a78f1b2717c2e0fde5994a04.tar.gz
textual-inversion-diff-555912a86b012382a78f1b2717c2e0fde5994a04.tar.bz2
textual-inversion-diff-555912a86b012382a78f1b2717c2e0fde5994a04.zip
Make embedding decay work like Adam decay
Diffstat (limited to 'training/strategy')
-rw-r--r--training/strategy/ti.py14
1 files changed, 5 insertions, 9 deletions
diff --git a/training/strategy/ti.py b/training/strategy/ti.py
index 081180f..eb6730b 100644
--- a/training/strategy/ti.py
+++ b/training/strategy/ti.py
@@ -32,12 +32,10 @@ def textual_inversion_strategy_callbacks(
32 seed: int, 32 seed: int,
33 placeholder_tokens: list[str], 33 placeholder_tokens: list[str],
34 placeholder_token_ids: list[list[int]], 34 placeholder_token_ids: list[list[int]],
35 learning_rate: float,
36 gradient_checkpointing: bool = False, 35 gradient_checkpointing: bool = False,
37 use_emb_decay: bool = False, 36 use_emb_decay: bool = False,
38 emb_decay_target: float = 0.4, 37 emb_decay_target: float = 0.4,
39 emb_decay_factor: float = 1, 38 emb_decay: float = 1e-2,
40 emb_decay_start: float = 0,
41 use_ema: bool = False, 39 use_ema: bool = False,
42 ema_inv_gamma: float = 1.0, 40 ema_inv_gamma: float = 1.0,
43 ema_power: int = 1, 41 ema_power: int = 1,
@@ -120,17 +118,15 @@ def textual_inversion_strategy_callbacks(
120 yield 118 yield
121 119
122 def on_after_optimize(lr: float): 120 def on_after_optimize(lr: float):
123 if ema_embeddings is not None:
124 ema_embeddings.step(text_encoder.text_model.embeddings.temp_token_embedding.parameters())
125
126 @torch.no_grad()
127 def on_after_epoch(lr: float):
128 if use_emb_decay: 121 if use_emb_decay:
129 text_encoder.text_model.embeddings.normalize( 122 text_encoder.text_model.embeddings.normalize(
130 emb_decay_target, 123 emb_decay_target,
131 min(1.0, max(0.0, emb_decay_factor * ((lr - emb_decay_start) / (learning_rate - emb_decay_start)))) 124 min(1.0, emb_decay * lr)
132 ) 125 )
133 126
127 if ema_embeddings is not None:
128 ema_embeddings.step(text_encoder.text_model.embeddings.temp_token_embedding.parameters())
129
134 def on_log(): 130 def on_log():
135 if ema_embeddings is not None: 131 if ema_embeddings is not None:
136 return {"ema_decay": ema_embeddings.decay} 132 return {"ema_decay": ema_embeddings.decay}