From 01fee7d37a116265edb0f16e0b2f75d2116eb9f6 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Wed, 4 Jan 2023 12:18:07 +0100 Subject: Various updates --- train_ti.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'train_ti.py') diff --git a/train_ti.py b/train_ti.py index 6f116c3..1b60f64 100644 --- a/train_ti.py +++ b/train_ti.py @@ -259,6 +259,12 @@ def parse_args(): default=1, help='If lr_annealing_func is "half_cos" or "cos", exponent to modify the function' ) + parser.add_argument( + "--lr_min_lr", + type=float, + default=None, + help="Minimum learning rate in the lr scheduler." + ) parser.add_argument( "--use_8bit_adam", action="store_true", @@ -744,6 +750,7 @@ def main(): if args.find_lr: lr_scheduler = None elif args.lr_scheduler == "one_cycle": + lr_min_lr = 0.04 if args.lr_min_lr is None else args.lr_min_lr / args.learning_rate lr_scheduler = get_one_cycle_schedule( optimizer=optimizer, num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, @@ -751,6 +758,7 @@ def main(): annealing=args.lr_annealing_func, warmup_exp=args.lr_warmup_exp, annealing_exp=args.lr_annealing_exp, + min_lr=lr_min_lr, ) elif args.lr_scheduler == "cosine_with_restarts": lr_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( -- cgit v1.2.3-54-g00ecf