From 54d72ba4a8331d822a48bad9e381b47d39598125 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Wed, 28 Dec 2022 21:00:34 +0100 Subject: Updated 1-cycle scheduler --- training/lr.py | 14 ++++++++------ training/optimization.py | 10 +++++++--- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'training') diff --git a/training/lr.py b/training/lr.py index c1fa3a0..c0e9b3f 100644 --- a/training/lr.py +++ b/training/lr.py @@ -19,8 +19,8 @@ class LRFinder(): self.val_dataloader = val_dataloader self.loss_fn = loss_fn - self.model_state = copy.deepcopy(model.state_dict()) - self.optimizer_state = copy.deepcopy(optimizer.state_dict()) + # self.model_state = copy.deepcopy(model.state_dict()) + # self.optimizer_state = copy.deepcopy(optimizer.state_dict()) def run(self, min_lr, skip_start=10, skip_end=5, num_epochs=100, num_train_batches=1, num_val_batches=math.inf, smooth_f=0.05, diverge_th=5): best_loss = None @@ -109,8 +109,8 @@ class LRFinder(): "lr": lr, }) - self.model.load_state_dict(self.model_state) - self.optimizer.load_state_dict(self.optimizer_state) + # self.model.load_state_dict(self.model_state) + # self.optimizer.load_state_dict(self.optimizer_state) if loss > diverge_th * best_loss: print("Stopping early, the loss has diverged") @@ -127,12 +127,14 @@ class LRFinder(): fig, ax_loss = plt.subplots() - ax_loss.plot(lrs, losses, color='red', label='Loss') + ax_loss.plot(lrs, losses, color='red') ax_loss.set_xscale("log") ax_loss.set_xlabel("Learning rate") + ax_loss.set_ylabel("Loss") # ax_acc = ax_loss.twinx() - # ax_acc.plot(lrs, accs, color='blue', label='Accuracy') + # ax_acc.plot(lrs, accs, color='blue') + # ax_acc.set_ylabel("Accuracy") print("LR suggestion: steepest gradient") min_grad_idx = None diff --git a/training/optimization.py b/training/optimization.py index 3809f3b..a0c8673 100644 --- a/training/optimization.py +++ b/training/optimization.py @@ -6,7 +6,7 @@ from diffusers.utils import logging logger = logging.get_logger(__name__) -def get_one_cycle_schedule(optimizer, num_training_steps, annealing="cos", min_lr=0.01, mid_point=0.4, last_epoch=-1): +def get_one_cycle_schedule(optimizer, num_training_steps, annealing="cos", min_lr=0.04, mid_point=0.3, last_epoch=-1): """ Create a schedule with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer. @@ -35,8 +35,12 @@ def get_one_cycle_schedule(optimizer, num_training_steps, annealing="cos", min_l progress = float(num_training_steps - current_step) / float(max(1, num_training_steps - thresh_down)) return max(0.0, progress) * min_lr - else: - progress = float(current_step - thresh_up) / float(max(1, num_training_steps - thresh_up)) + + progress = float(current_step - thresh_up) / float(max(1, num_training_steps - thresh_up)) + + if annealing == "half_cos": return max(0.0, 1.0 + math.cos(math.pi * (0.5 + 0.5 * progress))) + return max(0.0, 0.5 * (1.0 + math.cos(math.pi * progress))) + return LambdaLR(optimizer, lr_lambda, last_epoch) -- cgit v1.2.3-70-g09d2