From 8716116ada220172c36aa34a138f0a0ebefdd684 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Sun, 2 Oct 2022 16:30:30 +0200 Subject: Fix img2img --- .../stable_diffusion/vlpn_stable_diffusion.py | 71 +++++++++++----------- 1 file changed, 36 insertions(+), 35 deletions(-) (limited to 'pipelines') diff --git a/pipelines/stable_diffusion/vlpn_stable_diffusion.py b/pipelines/stable_diffusion/vlpn_stable_diffusion.py index a8ecedf..b4c85e9 100644 --- a/pipelines/stable_diffusion/vlpn_stable_diffusion.py +++ b/pipelines/stable_diffusion/vlpn_stable_diffusion.py @@ -178,35 +178,6 @@ class VlpnStableDiffusion(DiffusionPipeline): # set timesteps self.scheduler.set_timesteps(num_inference_steps) - offset = self.scheduler.config.get("steps_offset", 0) - - if latents is not None and isinstance(latents, PIL.Image.Image): - latents = preprocess(latents, width, height) - latent_dist = self.vae.encode(latents.to(self.device)).latent_dist - latents = latent_dist.sample(generator=generator) - latents = 0.18215 * latents - - # expand init_latents for batch_size - latents = torch.cat([latents] * batch_size) - - # get the original timestep using init_timestep - init_timestep = int(num_inference_steps * strength) + offset - init_timestep = min(init_timestep, num_inference_steps) - - if isinstance(self.scheduler, LMSDiscreteScheduler): - timesteps = torch.tensor( - [num_inference_steps - init_timestep] * batch_size, dtype=torch.long, device=self.device - ) - else: - timesteps = self.scheduler.timesteps[-init_timestep] - timesteps = torch.tensor([timesteps] * batch_size, dtype=torch.long, device=self.device) - - # add noise to latents using the timesteps - noise = torch.randn(latents.shape, generator=generator, device=self.device) - latents = self.scheduler.add_noise(latents, noise, timesteps) - else: - init_timestep = num_inference_steps + offset - # get prompt text embeddings text_inputs = self.tokenizer( prompt, @@ -243,6 +214,10 @@ class VlpnStableDiffusion(DiffusionPipeline): # to avoid doing two forward passes text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + offset = self.scheduler.config.get("steps_offset", 0) + init_timestep = num_inference_steps + offset + ensure_sigma = not isinstance(latents, PIL.Image.Image) + # get the initial random noise unless the user supplied it # Unlike in other pipelines, latents need to be generated in the target device @@ -257,23 +232,48 @@ class VlpnStableDiffusion(DiffusionPipeline): device=latents_device, dtype=text_embeddings.dtype, ) + elif isinstance(latents, PIL.Image.Image): + latents = preprocess(latents, width, height) + latent_dist = self.vae.encode(latents.to(self.device)).latent_dist + latents = latent_dist.sample(generator=generator) + latents = 0.18215 * latents + + # expand init_latents for batch_size + latents = torch.cat([latents] * batch_size) + + # get the original timestep using init_timestep + init_timestep = int(num_inference_steps * strength) + offset + init_timestep = min(init_timestep, num_inference_steps) + + if isinstance(self.scheduler, LMSDiscreteScheduler): + timesteps = torch.tensor( + [num_inference_steps - init_timestep] * batch_size, dtype=torch.long, device=self.device + ) + else: + timesteps = self.scheduler.timesteps[-init_timestep] + timesteps = torch.tensor([timesteps] * batch_size, dtype=torch.long, device=self.device) + + # add noise to latents using the timesteps + noise = torch.randn(latents.shape, generator=generator, device=self.device) + latents = self.scheduler.add_noise(latents, noise, timesteps) else: if latents.shape != latents_shape: raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") latents = latents.to(self.device) + # if we use LMSDiscreteScheduler, let's make sure latents are multiplied by sigmas + if ensure_sigma: + if isinstance(self.scheduler, LMSDiscreteScheduler): + latents = latents * self.scheduler.sigmas[0] + elif isinstance(self.scheduler, EulerAScheduler): + latents = latents * self.scheduler.sigmas[0] + t_start = max(num_inference_steps - init_timestep + offset, 0) # Some schedulers like PNDM have timesteps as arrays # It's more optimzed to move all timesteps to correct device beforehand timesteps_tensor = torch.tensor(self.scheduler.timesteps[t_start:], device=self.device) - # if we use LMSDiscreteScheduler, let's make sure latents are multiplied by sigmas - if isinstance(self.scheduler, LMSDiscreteScheduler): - latents = latents * self.scheduler.sigmas[0] - elif isinstance(self.scheduler, EulerAScheduler): - latents = latents * self.scheduler.sigmas[0] - # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 @@ -292,6 +292,7 @@ class VlpnStableDiffusion(DiffusionPipeline): # expand the latents if we are doing classifier free guidance latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + if isinstance(self.scheduler, LMSDiscreteScheduler): sigma = self.scheduler.sigmas[t_index] # the model input needs to be scaled to match the continuous ODE formulation in K-LMS -- cgit v1.2.3-54-g00ecf