From a551a9ac2edd1dc59828749a5e5d73a65b3c9ce7 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Sat, 1 Apr 2023 15:54:40 +0200 Subject: Update --- train_dreambooth.py | 1 - train_lora.py | 1 - train_ti.py | 1 - training/functional.py | 18 +++++++++++++----- training/optimization.py | 7 +++++-- 5 files changed, 18 insertions(+), 10 deletions(-) diff --git a/train_dreambooth.py b/train_dreambooth.py index 4456bd1..48b7926 100644 --- a/train_dreambooth.py +++ b/train_dreambooth.py @@ -539,7 +539,6 @@ def main(): elif args.optimizer == 'adafactor': create_optimizer = partial( transformers.optimization.Adafactor, - beta1=args.adam_beta1, weight_decay=args.adam_weight_decay, scale_parameter=True, relative_step=True, diff --git a/train_lora.py b/train_lora.py index f8dccae..8fc2d69 100644 --- a/train_lora.py +++ b/train_lora.py @@ -571,7 +571,6 @@ def main(): elif args.optimizer == 'adafactor': create_optimizer = partial( transformers.optimization.Adafactor, - beta1=args.adam_beta1, weight_decay=args.adam_weight_decay, scale_parameter=True, relative_step=True, diff --git a/train_ti.py b/train_ti.py index 274a1ca..5482326 100644 --- a/train_ti.py +++ b/train_ti.py @@ -669,7 +669,6 @@ def main(): elif args.optimizer == 'adafactor': create_optimizer = partial( transformers.optimization.Adafactor, - beta1=args.adam_beta1, weight_decay=args.adam_weight_decay, scale_parameter=True, relative_step=True, diff --git a/training/functional.py b/training/functional.py index ac43847..7104a88 100644 --- a/training/functional.py +++ b/training/functional.py @@ -484,12 +484,16 @@ def train_loop( avg_loss.update(loss.detach_(), bsz) avg_acc.update(acc.detach_(), bsz) + lr = lr_scheduler.get_last_lr()[0] + if torch.is_tensor(lr): + lr = lr.item() + logs = { "train/loss": avg_loss.avg.item(), "train/acc": avg_acc.avg.item(), "train/cur_loss": loss.item(), "train/cur_acc": acc.item(), - "lr": lr_scheduler.get_last_lr()[0], + "lr": lr, } if isDadaptation: logs["lr/d*lr"] = optimizer.param_groups[0]["d"] * optimizer.param_groups[0]["lr"] @@ -498,13 +502,13 @@ def train_loop( local_progress_bar.set_postfix(**logs) if ((step + 1) % gradient_accumulation_steps == 0) or ((step + 1) == len(train_dataloader)): - before_optimize_result = on_before_optimize(lr_scheduler.get_last_lr()[0], epoch) + before_optimize_result = on_before_optimize(lr, epoch) optimizer.step() lr_scheduler.step() optimizer.zero_grad(set_to_none=True) - on_after_optimize(before_optimize_result, lr_scheduler.get_last_lr()[0]) + on_after_optimize(before_optimize_result, lr) local_progress_bar.update(1) global_progress_bar.update(1) @@ -518,9 +522,13 @@ def train_loop( accelerator.wait_for_everyone() - lrs.append(lr_scheduler.get_last_lr()[0]) + lr = lr_scheduler.get_last_lr()[0] + if torch.is_tensor(lr): + lr = lr.item + + lrs.append(lr) - on_after_epoch(lr_scheduler.get_last_lr()[0]) + on_after_epoch(lr) if val_dataloader is not None: model.eval() diff --git a/training/optimization.py b/training/optimization.py index 53d0a6d..d22a900 100644 --- a/training/optimization.py +++ b/training/optimization.py @@ -6,7 +6,7 @@ import torch from torch.optim.lr_scheduler import LambdaLR from diffusers.optimization import get_scheduler as get_scheduler_, get_cosine_with_hard_restarts_schedule_with_warmup -import transformers +from transformers.optimization import get_adafactor_schedule class OneCyclePhase(NamedTuple): @@ -150,7 +150,10 @@ def get_scheduler( num_cycles=cycles, ) elif id == "adafactor": - lr_scheduler = transformers.optimization.AdafactorSchedule(optimizer, min_lr) + lr_scheduler = get_adafactor_schedule( + optimizer, + initial_lr=min_lr + ) else: lr_scheduler = get_scheduler_( id, -- cgit v1.2.3-54-g00ecf