1 files changed, 33 insertions, 20 deletions
diff --git a/train_lora.py b/train_lora.py
index 4d4c16a..ba5aee1 100644
--- a/train_lora.py
+++ b/train_lora.py
@@ -84,9 +84,9 @@ def parse_args():
    )
    parser.add_argument(
        "--auto_cycles",
-        type=int,
+        type=str,
-        default=1,
+        default="o",
-        help="How many cycles to run automatically."
+        help="Cycles to run automatically."
    )
    parser.add_argument(
        "--cycle_decay",
@@ -95,11 +95,6 @@ def parse_args():
        help="Learning rate decay per cycle."
    )
    parser.add_argument(
-        "--cycle_constant",
-        action="store_true",
-        help="Use constant LR on cycles > 1."
-    )
-    parser.add_argument(
        "--placeholder_tokens",
        type=str,
        nargs='*',
@@ -920,7 +915,6 @@ def main():
        annealing_func=args.lr_annealing_func,
        warmup_exp=args.lr_warmup_exp,
        annealing_exp=args.lr_annealing_exp,
-        cycles=args.lr_cycles,
        end_lr=1e2,
        mid_point=args.lr_mid_point,
    )
@@ -964,20 +958,38 @@ def main():
    lora_sample_output_dir = output_dir / lora_project / "samples"
+    auto_cycles = list(args.auto_cycles)
+    lr_scheduler = args.lr_scheduler
+    lr_warmup_epochs = args.lr_warmup_epochs
+    lr_cycles = args.lr_cycles
    while True:
-        if training_iter >= args.auto_cycles:
+        if len(auto_cycles) != 0:
-            response = input("Run another cycle? [y/n] ")
+            response = auto_cycles.pop(0)
-            if response.lower().strip() == "n":
+        else:
-                break
+            response = input("Choose action: [o] one_cycle, [w] warmup, [c] constant, [d] decay, [s] stop \n--> ")
+        if response.lower().strip() == "o":
+            lr_scheduler = "one_cycle"
+            lr_warmup_epochs = args.lr_warmup_epochs
+            lr_cycles = args.lr_cycles
+        if response.lower().strip() == "w":
+            lr_scheduler = "constant"
+            lr_warmup_epochs = num_train_epochs
+        if response.lower().strip() == "c":
+            lr_scheduler = "constant"
+            lr_warmup_epochs = 0
+        if response.lower().strip() == "d":
+            lr_scheduler = "cosine"
+            lr_warmup_epochs = 0
+            lr_cycles = 1
+        elif response.lower().strip() == "s":
+            break
        print("")
        print(f"============ LoRA cycle {training_iter + 1} ============")
        print("")
-        if args.cycle_constant and training_iter == 1:
-            args.lr_scheduler = "constant"
-            args.lr_warmup_epochs = 0
        params_to_optimize = []
        if len(args.placeholder_tokens) != 0:
@@ -1012,12 +1024,13 @@ def main():
        lora_optimizer = create_optimizer(params_to_optimize)
        lora_lr_scheduler = create_lr_scheduler(
-            args.lr_scheduler,
+            lr_scheduler,
            gradient_accumulation_steps=args.gradient_accumulation_steps,
            optimizer=lora_optimizer,
            num_training_steps_per_epoch=len(lora_datamodule.train_dataloader),
            train_epochs=num_train_epochs,
-            warmup_epochs=args.lr_warmup_epochs,
+            cycles=lr_cycles,
+            warmup_epochs=lr_warmup_epochs,
        )
        lora_checkpoint_output_dir = output_dir / lora_project / f"model_{training_iter + 1}"
@@ -1031,7 +1044,7 @@ def main():
            num_train_epochs=num_train_epochs,
            gradient_accumulation_steps=args.gradient_accumulation_steps,
            global_step_offset=training_iter * num_train_steps,
-            initial_samples=training_iter == 0,
+            cycle=training_iter,
            # --
            group_labels=group_labels,
            sample_output_dir=lora_sample_output_dir,