From adc52fb8821a496bc8d78235bf10466b39df03e0 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Sun, 1 Jan 2023 19:19:52 +0100 Subject: Updates --- train_ti.py | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) (limited to 'train_ti.py') diff --git a/train_ti.py b/train_ti.py index 20a3190..775b918 100644 --- a/train_ti.py +++ b/train_ti.py @@ -1,5 +1,4 @@ import argparse -import itertools import math import datetime import logging @@ -155,6 +154,12 @@ def parse_args(): default=0.1, help="Tag dropout probability.", ) + parser.add_argument( + "--vector_shuffle", + type=str, + default="auto", + help='Vector shuffling algorithm. Choose between ["all", "trailing", "leading", "between", "auto", "off"]', + ) parser.add_argument( "--dataloader_num_workers", type=int, @@ -245,7 +250,7 @@ def parse_args(): parser.add_argument( "--lr_annealing_exp", type=int, - default=2, + default=1, help='If lr_annealing_func is "half_cos" or "cos", exponent to modify the function' ) parser.add_argument( @@ -502,20 +507,14 @@ def main(): basepath = Path(args.output_dir).joinpath(slugify(args.project), now) basepath.mkdir(parents=True, exist_ok=True) - if args.find_lr: - accelerator = Accelerator( - gradient_accumulation_steps=args.gradient_accumulation_steps, - mixed_precision=args.mixed_precision - ) - else: - accelerator = Accelerator( - log_with=LoggerType.TENSORBOARD, - logging_dir=f"{basepath}", - gradient_accumulation_steps=args.gradient_accumulation_steps, - mixed_precision=args.mixed_precision - ) + accelerator = Accelerator( + log_with=LoggerType.TENSORBOARD, + logging_dir=f"{basepath}", + gradient_accumulation_steps=args.gradient_accumulation_steps, + mixed_precision=args.mixed_precision + ) - logging.basicConfig(filename=basepath.joinpath("log.txt"), level=logging.DEBUG) + logging.basicConfig(filename=basepath.joinpath("log.txt"), level=logging.DEBUG) args.seed = args.seed or (torch.random.seed() >> 32) set_seed(args.seed) @@ -534,7 +533,7 @@ def main(): checkpoint_scheduler = DPMSolverMultistepScheduler.from_pretrained( args.pretrained_model_name_or_path, subfolder='scheduler') - tokenizer.set_use_vector_shuffle(True) + tokenizer.set_use_vector_shuffle(args.vector_shuffle) vae.enable_slicing() vae.set_use_memory_efficient_attention_xformers(True) @@ -585,7 +584,7 @@ def main(): ) if args.find_lr: - args.learning_rate = 1e3 + args.learning_rate = 1e2 # Use 8-bit Adam for lower memory usage or to fine-tune the model in 16GB GPUs if args.use_8bit_adam: @@ -830,15 +829,6 @@ def main(): return loss, acc, bsz - if args.find_lr: - lr_finder = LRFinder(accelerator, text_encoder, optimizer, train_dataloader, val_dataloader, loop) - lr_finder.run(min_lr=1e-4) - - plt.savefig(basepath.joinpath("lr.png")) - plt.close() - - quit() - # We need to initialize the trackers we use, and also store our configuration. # The trackers initializes automatically on the main process. if accelerator.is_main_process: @@ -852,6 +842,15 @@ def main(): config["exclude_collections"] = " ".join(config["exclude_collections"]) accelerator.init_trackers("textual_inversion", config=config) + if args.find_lr: + lr_finder = LRFinder(accelerator, text_encoder, optimizer, train_dataloader, val_dataloader, loop) + lr_finder.run(min_lr=1e-4) + + plt.savefig(basepath.joinpath("lr.png")) + plt.close() + + quit() + # Train! total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps -- cgit v1.2.3-54-g00ecf