From e0b686b475885f0c8480f7173eaa7359adf17e27 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Mon, 26 Dec 2022 14:24:21 +0100 Subject: Set default dimensions to 768; add config inheritance --- common.py | 14 ++++++++++++++ data/csv.py | 4 ++-- infer.py | 11 +++++------ pipelines/stable_diffusion/vlpn_stable_diffusion.py | 8 ++++---- train_dreambooth.py | 7 +++---- train_lora.py | 7 +++---- train_ti.py | 11 +++++------ 7 files changed, 36 insertions(+), 26 deletions(-) diff --git a/common.py b/common.py index 7ffa77f..f369475 100644 --- a/common.py +++ b/common.py @@ -1,9 +1,23 @@ from pathlib import Path +import json + import torch from transformers import CLIPTextModel, CLIPTokenizer +def load_config(filename): + with open(filename, 'rt') as f: + config = json.load(f) + + args = config["args"] + + if "base" in config: + args = load_config(Path(filename).parent.joinpath(config["base"])) | args + + return args + + def load_text_embedding(embeddings, token_id, file): data = torch.load(file, map_location="cpu") diff --git a/data/csv.py b/data/csv.py index 0810c2c..0ad36dc 100644 --- a/data/csv.py +++ b/data/csv.py @@ -51,7 +51,7 @@ class CSVDataModule(): prompt_processor: PromptProcessor, class_subdir: str = "cls", num_class_images: int = 1, - size: int = 512, + size: int = 768, repeats: int = 1, dropout: float = 0, interpolation: str = "bicubic", @@ -196,7 +196,7 @@ class CSVDataset(Dataset): prompt_processor: PromptProcessor, batch_size: int = 1, num_class_images: int = 0, - size: int = 512, + size: int = 768, repeats: int = 1, dropout: float = 0, interpolation: str = "bicubic", diff --git a/infer.py b/infer.py index f566114..ae0b4da 100644 --- a/infer.py +++ b/infer.py @@ -24,7 +24,7 @@ from transformers import CLIPTextModel, CLIPTokenizer from slugify import slugify from pipelines.stable_diffusion.vlpn_stable_diffusion import VlpnStableDiffusion -from common import load_text_embeddings +from common import load_text_embeddings, load_config torch.backends.cuda.matmul.allow_tf32 = True @@ -46,8 +46,8 @@ default_cmds = { "negative_prompt": None, "image": None, "image_noise": .7, - "width": 512, - "height": 512, + "width": 768, + "height": 768, "batch_size": 1, "batch_num": 1, "steps": 30, @@ -163,9 +163,8 @@ def run_parser(parser, defaults, input=None): conf_args = argparse.Namespace() if args.config is not None: - with open(args.config, 'rt') as f: - conf_args = parser.parse_known_args( - namespace=argparse.Namespace(**json.load(f)["args"]))[0] + args = load_config(args.config) + args = parser.parse_args(namespace=argparse.Namespace(**args)) res = defaults.copy() for dict in [vars(conf_args), vars(args)]: diff --git a/pipelines/stable_diffusion/vlpn_stable_diffusion.py b/pipelines/stable_diffusion/vlpn_stable_diffusion.py index a43a8e4..53b5eea 100644 --- a/pipelines/stable_diffusion/vlpn_stable_diffusion.py +++ b/pipelines/stable_diffusion/vlpn_stable_diffusion.py @@ -318,8 +318,8 @@ class VlpnStableDiffusion(DiffusionPipeline): negative_prompt: Optional[Union[str, List[str], List[List[str]]]] = None, num_images_per_prompt: Optional[int] = 1, strength: float = 0.8, - height: Optional[int] = 512, - width: Optional[int] = 512, + height: Optional[int] = 768, + width: Optional[int] = 768, num_inference_steps: Optional[int] = 50, guidance_scale: Optional[float] = 7.5, eta: Optional[float] = 0.0, @@ -342,9 +342,9 @@ class VlpnStableDiffusion(DiffusionPipeline): number of denoising steps depends on the amount of noise initially added. When `strength` is 1, added noise will be maximum and the denoising process will run for the full number of iterations specified in `num_inference_steps`. A value of 1, therefore, essentially ignores `init_image`. - height (`int`, *optional*, defaults to 512): + height (`int`, *optional*, defaults to 768): The height in pixels of the generated image. - width (`int`, *optional*, defaults to 512): + width (`int`, *optional*, defaults to 768): The width in pixels of the generated image. num_inference_steps (`int`, *optional*, defaults to 50): The number of denoising steps. More denoising steps usually lead to a higher quality image at the diff --git a/train_dreambooth.py b/train_dreambooth.py index 2c765ec..08bc9e0 100644 --- a/train_dreambooth.py +++ b/train_dreambooth.py @@ -20,7 +20,7 @@ from tqdm.auto import tqdm from transformers import CLIPTextModel, CLIPTokenizer from slugify import slugify -from common import load_text_embeddings +from common import load_text_embeddings, load_config from pipelines.stable_diffusion.vlpn_stable_diffusion import VlpnStableDiffusion from data.csv import CSVDataModule from training.optimization import get_one_cycle_schedule @@ -355,9 +355,8 @@ def parse_args(): args = parser.parse_args() if args.config is not None: - with open(args.config, 'rt') as f: - args = parser.parse_args( - namespace=argparse.Namespace(**json.load(f)["args"])) + args = load_config(args.config) + args = parser.parse_args(namespace=argparse.Namespace(**args)) if args.train_data_file is None: raise ValueError("You must specify --train_data_file") diff --git a/train_lora.py b/train_lora.py index 34e1008..ffca304 100644 --- a/train_lora.py +++ b/train_lora.py @@ -20,7 +20,7 @@ from tqdm.auto import tqdm from transformers import CLIPTextModel, CLIPTokenizer from slugify import slugify -from common import load_text_embeddings +from common import load_text_embeddings, load_config from pipelines.stable_diffusion.vlpn_stable_diffusion import VlpnStableDiffusion from data.csv import CSVDataModule from training.lora import LoraAttnProcessor @@ -317,9 +317,8 @@ def parse_args(): args = parser.parse_args() if args.config is not None: - with open(args.config, 'rt') as f: - args = parser.parse_args( - namespace=argparse.Namespace(**json.load(f)["args"])) + args = load_config(args.config) + args = parser.parse_args(namespace=argparse.Namespace(**args)) if args.train_data_file is None: raise ValueError("You must specify --train_data_file") diff --git a/train_ti.py b/train_ti.py index a228795..6e30ac3 100644 --- a/train_ti.py +++ b/train_ti.py @@ -20,7 +20,7 @@ from tqdm.auto import tqdm from transformers import CLIPTextModel, CLIPTokenizer from slugify import slugify -from common import load_text_embeddings, load_text_embedding +from common import load_text_embeddings, load_text_embedding, load_config from pipelines.stable_diffusion.vlpn_stable_diffusion import VlpnStableDiffusion from data.csv import CSVDataModule, CSVDataItem from training.optimization import get_one_cycle_schedule @@ -225,7 +225,7 @@ def parse_args(): parser.add_argument( "--adam_weight_decay", type=float, - default=1e-2, + default=0, help="Weight decay to use." ) parser.add_argument( @@ -324,9 +324,8 @@ def parse_args(): args = parser.parse_args() if args.config is not None: - with open(args.config, 'rt') as f: - args = parser.parse_args( - namespace=argparse.Namespace(**json.load(f)["args"])) + args = load_config(args.config) + args = parser.parse_args(namespace=argparse.Namespace(**args)) if args.train_data_file is None: raise ValueError("You must specify --train_data_file") @@ -407,7 +406,7 @@ class Checkpointer(CheckpointerBase): for (placeholder_token, placeholder_token_id) in zip(self.placeholder_token, self.placeholder_token_id): # Save a checkpoint - learned_embeds = text_encoder.text_model.embeddings.trainable_embedding.weight[placeholder_token_id] + learned_embeds = text_encoder.text_model.embeddings.trainable_embedding.weight.data[placeholder_token_id] learned_embeds_dict = {placeholder_token: learned_embeds.detach().cpu()} filename = f"%s_%d_%s.bin" % (slugify(placeholder_token), step, postfix) -- cgit v1.2.3-54-g00ecf