From f23fd5184b8ba4ec04506495f4a61726e50756f7 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Mon, 3 Oct 2022 17:38:44 +0200 Subject: Small perf improvements --- data/dreambooth/csv.py | 5 ++- data/textual_inversion/csv.py | 4 +- dreambooth.py | 89 ++++++++++++++++++++++--------------------- infer.py | 5 ++- textual_inversion.py | 6 ++- 5 files changed, 58 insertions(+), 51 deletions(-) diff --git a/data/dreambooth/csv.py b/data/dreambooth/csv.py index 71aa1eb..c0b0067 100644 --- a/data/dreambooth/csv.py +++ b/data/dreambooth/csv.py @@ -70,8 +70,9 @@ class CSVDataModule(pl.LightningDataModule): size=self.size, interpolation=self.interpolation, identifier=self.identifier, center_crop=self.center_crop, batch_size=self.batch_size) self.train_dataloader_ = DataLoader(train_dataset, batch_size=self.batch_size, - shuffle=True, collate_fn=self.collate_fn) - self.val_dataloader_ = DataLoader(val_dataset, batch_size=self.batch_size, collate_fn=self.collate_fn) + shuffle=True, pin_memory=True, collate_fn=self.collate_fn) + self.val_dataloader_ = DataLoader(val_dataset, batch_size=self.batch_size, + pin_memory=True, collate_fn=self.collate_fn) def train_dataloader(self): return self.train_dataloader_ diff --git a/data/textual_inversion/csv.py b/data/textual_inversion/csv.py index 64f0c28..852b1cb 100644 --- a/data/textual_inversion/csv.py +++ b/data/textual_inversion/csv.py @@ -60,8 +60,8 @@ class CSVDataModule(pl.LightningDataModule): placeholder_token=self.placeholder_token, center_crop=self.center_crop) val_dataset = CSVDataset(self.data_val, self.tokenizer, size=self.size, interpolation=self.interpolation, placeholder_token=self.placeholder_token, center_crop=self.center_crop) - self.train_dataloader_ = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) - self.val_dataloader_ = DataLoader(val_dataset, batch_size=self.batch_size) + self.train_dataloader_ = DataLoader(train_dataset, batch_size=self.batch_size, pin_memory=True, shuffle=True) + self.val_dataloader_ = DataLoader(val_dataset, batch_size=self.batch_size, pin_memory=True) def train_dataloader(self): return self.train_dataloader_ diff --git a/dreambooth.py b/dreambooth.py index 5fbf172..9d6b8d6 100644 --- a/dreambooth.py +++ b/dreambooth.py @@ -13,7 +13,7 @@ import torch.utils.checkpoint from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import LoggerType, set_seed -from diffusers import AutoencoderKL, DDPMScheduler, PNDMScheduler, LMSDiscreteScheduler, StableDiffusionPipeline, UNet2DConditionModel +from diffusers import AutoencoderKL, DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, UNet2DConditionModel from schedulers.scheduling_euler_a import EulerAScheduler from diffusers.optimization import get_scheduler from pipelines.stable_diffusion.no_check import NoCheck @@ -30,6 +30,9 @@ from data.dreambooth.prompt import PromptDataset logger = get_logger(__name__) +torch.backends.cuda.matmul.allow_tf32 = True + + def parse_args(): parser = argparse.ArgumentParser( description="Simple example of a training script." @@ -346,7 +349,7 @@ class Checkpointer: print("Saving model...") unwrapped = self.accelerator.unwrap_model(self.unet) - pipeline = StableDiffusionPipeline( + pipeline = VlpnStableDiffusion( text_encoder=self.text_encoder, vae=self.vae, unet=self.accelerator.unwrap_model(self.unet), @@ -354,8 +357,6 @@ class Checkpointer: scheduler=PNDMScheduler( beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", skip_prk_steps=True ), - safety_checker=NoCheck(), - feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"), ) pipeline.enable_attention_slicing() pipeline.save_pretrained(f"{self.output_dir}/model") @@ -381,7 +382,6 @@ class Checkpointer: unet=unwrapped, tokenizer=self.tokenizer, scheduler=scheduler, - feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"), ).to(self.accelerator.device) pipeline.enable_attention_slicing() @@ -459,44 +459,6 @@ def main(): if args.seed is not None: set_seed(args.seed) - if args.with_prior_preservation: - class_images_dir = Path(args.class_data_dir) - class_images_dir.mkdir(parents=True, exist_ok=True) - cur_class_images = len(list(class_images_dir.iterdir())) - - if cur_class_images < args.num_class_images: - torch_dtype = torch.float32 - if accelerator.device.type == "cuda": - torch_dtype = {"no": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}[args.mixed_precision] - - pipeline = StableDiffusionPipeline.from_pretrained( - args.pretrained_model_name_or_path, torch_dtype=torch_dtype) - pipeline.enable_attention_slicing() - pipeline.set_progress_bar_config(disable=True) - pipeline.to(accelerator.device) - - num_new_images = args.num_class_images - cur_class_images - logger.info(f"Number of class images to sample: {num_new_images}.") - - sample_dataset = PromptDataset(args.class_prompt, num_new_images) - sample_dataloader = torch.utils.data.DataLoader(sample_dataset, batch_size=args.sample_batch_size) - - sample_dataloader = accelerator.prepare(sample_dataloader) - - for example in tqdm( - sample_dataloader, desc="Generating class images", disable=not accelerator.is_local_main_process - ): - with accelerator.autocast(): - images = pipeline(example["prompt"]).images - - for i, image in enumerate(images): - image.save(class_images_dir / f"{example['index'][i] + cur_class_images}.jpg") - - del pipeline - - if torch.cuda.is_available(): - torch.cuda.empty_cache() - # Load the tokenizer and add the placeholder token as a additional special token if args.tokenizer_name: tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) @@ -526,6 +488,47 @@ def main(): freeze_params(vae.parameters()) freeze_params(text_encoder.parameters()) + # Generate class images, if necessary + if args.with_prior_preservation: + class_images_dir = Path(args.class_data_dir) + class_images_dir.mkdir(parents=True, exist_ok=True) + cur_class_images = len(list(class_images_dir.iterdir())) + + if cur_class_images < args.num_class_images: + scheduler = EulerAScheduler( + beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear" + ) + + pipeline = VlpnStableDiffusion( + text_encoder=text_encoder, + vae=vae, + unet=unet, + tokenizer=tokenizer, + scheduler=scheduler, + ).to(accelerator.device) + pipeline.enable_attention_slicing() + pipeline.set_progress_bar_config(disable=True) + + num_new_images = args.num_class_images - cur_class_images + logger.info(f"Number of class images to sample: {num_new_images}.") + + sample_dataset = PromptDataset(args.class_prompt, num_new_images) + sample_dataloader = torch.utils.data.DataLoader(sample_dataset, batch_size=args.sample_batch_size) + + sample_dataloader = accelerator.prepare(sample_dataloader) + + for example in tqdm(sample_dataloader, desc="Generating class images", disable=not accelerator.is_local_main_process): + with accelerator.autocast(): + images = pipeline(example["prompt"]).images + + for i, image in enumerate(images): + image.save(class_images_dir / f"{example['index'][i] + cur_class_images}.jpg") + + del pipeline + + if torch.cuda.is_available(): + torch.cuda.empty_cache() + if args.scale_lr: args.learning_rate = ( args.learning_rate * args.gradient_accumulation_steps * diff --git a/infer.py b/infer.py index b15b17f..3dc0f32 100644 --- a/infer.py +++ b/infer.py @@ -16,6 +16,9 @@ from pipelines.stable_diffusion.vlpn_stable_diffusion import VlpnStableDiffusion from schedulers.scheduling_euler_a import EulerAScheduler +torch.backends.cuda.matmul.allow_tf32 = True + + default_args = { "model": None, "scheduler": "euler_a", @@ -166,7 +169,6 @@ def create_pipeline(model, scheduler, dtype): text_encoder = CLIPTextModel.from_pretrained(model + '/text_encoder', torch_dtype=dtype) vae = AutoencoderKL.from_pretrained(model + '/vae', torch_dtype=dtype) unet = UNet2DConditionModel.from_pretrained(model + '/unet', torch_dtype=dtype) - feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32", torch_dtype=dtype) if scheduler == "plms": scheduler = PNDMScheduler( @@ -191,7 +193,6 @@ def create_pipeline(model, scheduler, dtype): unet=unet, tokenizer=tokenizer, scheduler=scheduler, - feature_extractor=feature_extractor ) # pipeline.enable_attention_slicing() pipeline.to("cuda") diff --git a/textual_inversion.py b/textual_inversion.py index 00d460f..5fc2338 100644 --- a/textual_inversion.py +++ b/textual_inversion.py @@ -14,7 +14,7 @@ import torch.utils.checkpoint from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import LoggerType, set_seed -from diffusers import AutoencoderKL, DDPMScheduler, LMSDiscreteScheduler, StableDiffusionPipeline, UNet2DConditionModel +from diffusers import AutoencoderKL, DDPMScheduler, UNet2DConditionModel from schedulers.scheduling_euler_a import EulerAScheduler from diffusers.optimization import get_scheduler from PIL import Image @@ -30,6 +30,9 @@ from data.textual_inversion.csv import CSVDataModule logger = get_logger(__name__) +torch.backends.cuda.matmul.allow_tf32 = True + + def parse_args(): parser = argparse.ArgumentParser( description="Simple example of a training script." @@ -370,7 +373,6 @@ class Checkpointer: unet=self.unet, tokenizer=self.tokenizer, scheduler=scheduler, - feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"), ).to(self.accelerator.device) pipeline.enable_attention_slicing() -- cgit v1.2.3-70-g09d2