From 4d3d318a4168ef79847737cef2c0ad8a4dafd3e7 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Thu, 29 Dec 2022 09:00:19 +0100 Subject: Training improvements --- train_ti.py | 21 +++++++++++++++------ training/lr.py | 7 ++++--- training/optimization.py | 43 +++++++++++++++++++++++-------------------- 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/train_ti.py b/train_ti.py index d7696e5..b1f6a49 100644 --- a/train_ti.py +++ b/train_ti.py @@ -903,12 +903,21 @@ def main(): text_encoder.eval() + cur_loss_val = AverageMeter() + cur_acc_val = AverageMeter() + with torch.inference_mode(): for step, batch in enumerate(val_dataloader): loss, acc, bsz = loop(batch) - avg_loss_val.update(loss.detach_(), bsz) - avg_acc_val.update(acc.detach_(), bsz) + loss = loss.detach_() + acc = acc.detach_() + + cur_loss_val.update(loss, bsz) + cur_acc_val.update(acc, bsz) + + avg_loss_val.update(loss, bsz) + avg_acc_val.update(acc, bsz) local_progress_bar.update(1) global_progress_bar.update(1) @@ -921,10 +930,10 @@ def main(): } local_progress_bar.set_postfix(**logs) - accelerator.log({ - "val/loss": avg_loss_val.avg.item(), - "val/acc": avg_acc_val.avg.item(), - }, step=global_step) + logs["val/cur_loss"] = cur_loss_val.avg.item() + logs["val/cur_acc"] = cur_acc_val.avg.item() + + accelerator.log(logs, step=global_step) local_progress_bar.clear() global_progress_bar.clear() diff --git a/training/lr.py b/training/lr.py index c0e9b3f..0c5ce9e 100644 --- a/training/lr.py +++ b/training/lr.py @@ -90,6 +90,7 @@ class LRFinder(): else: if smooth_f > 0: loss = smooth_f * loss + (1 - smooth_f) * losses[-1] + acc = smooth_f * acc + (1 - smooth_f) * accs[-1] if loss < best_loss: best_loss = loss if acc > best_acc: @@ -132,9 +133,9 @@ class LRFinder(): ax_loss.set_xlabel("Learning rate") ax_loss.set_ylabel("Loss") - # ax_acc = ax_loss.twinx() - # ax_acc.plot(lrs, accs, color='blue') - # ax_acc.set_ylabel("Accuracy") + ax_acc = ax_loss.twinx() + ax_acc.plot(lrs, accs, color='blue') + ax_acc.set_ylabel("Accuracy") print("LR suggestion: steepest gradient") min_grad_idx = None diff --git a/training/optimization.py b/training/optimization.py index a0c8673..dfee2b5 100644 --- a/training/optimization.py +++ b/training/optimization.py @@ -1,4 +1,7 @@ import math +from typing import Literal + +import torch from torch.optim.lr_scheduler import LambdaLR from diffusers.utils import logging @@ -6,41 +9,41 @@ from diffusers.utils import logging logger = logging.get_logger(__name__) -def get_one_cycle_schedule(optimizer, num_training_steps, annealing="cos", min_lr=0.04, mid_point=0.3, last_epoch=-1): - """ - Create a schedule with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after - a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer. - Args: - optimizer ([`~torch.optim.Optimizer`]): - The optimizer for which to schedule the learning rate. - num_training_steps (`int`): - The total number of training steps. - last_epoch (`int`, *optional*, defaults to -1): - The index of the last epoch when resuming training. - Return: - `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule. - """ - +def get_one_cycle_schedule( + optimizer: torch.optim.Optimizer, + num_training_steps: int, + warmup: Literal["cos", "linear"] = "cos", + annealing: Literal["cos", "half_cos", "linear"] = "cos", + min_lr: int = 0.04, + mid_point: int = 0.3, + last_epoch: int = -1 +): def lr_lambda(current_step: int): thresh_up = int(num_training_steps * min(mid_point, 0.5)) if current_step < thresh_up: - return min_lr + float(current_step) / float(max(1, thresh_up)) * (1 - min_lr) + progress = float(current_step) / float(max(1, thresh_up)) + + if warmup == "linear": + return min_lr + progress * (1 - min_lr) + + return min_lr + 0.5 * (1.0 + math.cos(math.pi * (1 + progress))) if annealing == "linear": thresh_down = thresh_up * 2 if current_step < thresh_down: - return min_lr + float(thresh_down - current_step) / float(max(1, thresh_down - thresh_up)) * (1 - min_lr) + progress = float(thresh_down - current_step) / float(max(1, thresh_down - thresh_up)) + return min_lr + progress * (1 - min_lr) progress = float(num_training_steps - current_step) / float(max(1, num_training_steps - thresh_down)) - return max(0.0, progress) * min_lr + return progress * min_lr progress = float(current_step - thresh_up) / float(max(1, num_training_steps - thresh_up)) if annealing == "half_cos": - return max(0.0, 1.0 + math.cos(math.pi * (0.5 + 0.5 * progress))) + return 1.0 + math.cos(math.pi * (0.5 + 0.5 * progress)) - return max(0.0, 0.5 * (1.0 + math.cos(math.pi * progress))) + return 0.5 * (1.0 + math.cos(math.pi * progress)) return LambdaLR(optimizer, lr_lambda, last_epoch) -- cgit v1.2.3-54-g00ecf