From 6df1fc46daca9c289f1d7f7524e01deac5c92fd1 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Tue, 27 Dec 2022 13:58:48 +0100 Subject: Improved learning rate finder --- train_ti.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'train_ti.py') diff --git a/train_ti.py b/train_ti.py index 32f44f4..870b2ba 100644 --- a/train_ti.py +++ b/train_ti.py @@ -548,9 +548,6 @@ def main(): args.train_batch_size * accelerator.num_processes ) - if args.find_lr: - args.learning_rate = 1e2 - # Use 8-bit Adam for lower memory usage or to fine-tune the model in 16GB GPUs if args.use_8bit_adam: try: @@ -783,7 +780,7 @@ def main(): if args.find_lr: lr_finder = LRFinder(accelerator, text_encoder, optimizer, train_dataloader, val_dataloader, loop) - lr_finder.run(num_train_steps=2) + lr_finder.run(min_lr=1e-6, num_train_batches=4) plt.savefig(basepath.joinpath("lr.png")) plt.close() @@ -908,9 +905,8 @@ def main(): avg_loss_val.update(loss.detach_(), bsz) avg_acc_val.update(acc.detach_(), bsz) - if accelerator.sync_gradients: - local_progress_bar.update(1) - global_progress_bar.update(1) + local_progress_bar.update(1) + global_progress_bar.update(1) logs = { "val/loss": avg_loss_val.avg.item(), -- cgit v1.2.3-54-g00ecf