From 6df1fc46daca9c289f1d7f7524e01deac5c92fd1 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Tue, 27 Dec 2022 13:58:48 +0100 Subject: Improved learning rate finder --- training/lr.py | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'training/lr.py') diff --git a/training/lr.py b/training/lr.py index 5343f24..8e558e1 100644 --- a/training/lr.py +++ b/training/lr.py @@ -1,3 +1,6 @@ +import math +import copy + import matplotlib.pyplot as plt import numpy as np import torch @@ -16,15 +19,22 @@ class LRFinder(): self.val_dataloader = val_dataloader self.loss_fn = loss_fn - def run(self, num_epochs=100, num_train_steps=1, num_val_steps=1, smooth_f=0.05, diverge_th=5): + self.model_state = copy.deepcopy(model.state_dict()) + self.optimizer_state = copy.deepcopy(optimizer.state_dict()) + + def run(self, min_lr, num_epochs=100, num_train_batches=1, num_val_batches=math.inf, smooth_f=0.05, diverge_th=5): best_loss = None lrs = [] losses = [] - lr_scheduler = get_exponential_schedule(self.optimizer, num_epochs) + lr_scheduler = get_exponential_schedule(self.optimizer, min_lr, num_epochs) + + steps = min(num_train_batches, len(self.train_dataloader)) + steps += min(num_val_batches, len(self.val_dataloader)) + steps *= num_epochs progress_bar = tqdm( - range(num_epochs * (num_train_steps + num_val_steps)), + range(steps), disable=not self.accelerator.is_local_main_process, dynamic_ncols=True ) @@ -38,6 +48,9 @@ class LRFinder(): self.model.train() for step, batch in enumerate(self.train_dataloader): + if step >= num_train_batches: + break + with self.accelerator.accumulate(self.model): loss, acc, bsz = self.loss_fn(batch) @@ -49,21 +62,17 @@ class LRFinder(): if self.accelerator.sync_gradients: progress_bar.update(1) - if step >= num_train_steps: - break - self.model.eval() with torch.inference_mode(): for step, batch in enumerate(self.val_dataloader): + if step >= num_val_batches: + break + loss, acc, bsz = self.loss_fn(batch) avg_loss.update(loss.detach_(), bsz) - if self.accelerator.sync_gradients: - progress_bar.update(1) - - if step >= num_val_steps: - break + progress_bar.update(1) lr_scheduler.step() @@ -87,6 +96,9 @@ class LRFinder(): "lr": lr, }) + self.model.load_state_dict(self.model_state) + self.optimizer.load_state_dict(self.optimizer_state) + if loss > diverge_th * best_loss: print("Stopping early, the loss has diverged") break @@ -120,8 +132,8 @@ class LRFinder(): ax.set_ylabel("Loss") -def get_exponential_schedule(optimizer, num_epochs, last_epoch=-1): +def get_exponential_schedule(optimizer, min_lr, num_epochs, last_epoch=-1): def lr_lambda(current_epoch: int): - return (current_epoch / num_epochs) ** 5 + return min_lr + ((current_epoch / num_epochs) ** 10) * (1 - min_lr) return LambdaLR(optimizer, lr_lambda, last_epoch) -- cgit v1.2.3-54-g00ecf