From 73910b7f55244ce787fc6a3e6af09240ef0cdfd3 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Sat, 4 Mar 2023 09:46:41 +0100 Subject: Pipeline: Perlin noise for init image --- .../stable_diffusion/vlpn_stable_diffusion.py | 23 +++++++++------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'pipelines/stable_diffusion/vlpn_stable_diffusion.py') diff --git a/pipelines/stable_diffusion/vlpn_stable_diffusion.py b/pipelines/stable_diffusion/vlpn_stable_diffusion.py index 2251848..a6b31d8 100644 --- a/pipelines/stable_diffusion/vlpn_stable_diffusion.py +++ b/pipelines/stable_diffusion/vlpn_stable_diffusion.py @@ -24,7 +24,9 @@ from diffusers import ( from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput from diffusers.utils import logging, randn_tensor from transformers import CLIPTextModel, CLIPTokenizer + from models.clip.util import unify_input_ids, get_extended_embeddings +from util.noise import perlin_noise logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -304,23 +306,18 @@ class VlpnStableDiffusion(DiffusionPipeline): return timesteps, num_inference_steps - t_start - def prepare_image(self, batch_size, width, height, max_offset, dtype, device, generator=None): - offset = (max_offset * (2 * torch.rand( + def prepare_image(self, batch_size, width, height, dtype, device, generator=None): + max = 0.4 + offset = max * (2 * torch.rand( (batch_size, 1, 1, 1), dtype=dtype, device=device, generator=generator - ) - 1)).expand(batch_size, 1, 2, 2) - image = F.interpolate( - torch.normal( - mean=offset, - std=0.3, - generator=generator - ).clamp(-1, 1), - size=(width, height), - mode="bicubic" + ) - 1) + noise = perlin_noise( + batch_size, width, height, res=3, octaves=3, generator=generator, dtype=dtype, device=device ).expand(batch_size, 3, width, height) - return image + return ((1 + max) * noise + max * offset).clamp(-1, 1) def prepare_latents(self, init_image, timestep, batch_size, dtype, device, generator=None): init_image = init_image.to(device=device, dtype=dtype) @@ -384,7 +381,6 @@ class VlpnStableDiffusion(DiffusionPipeline): eta: float = 0.0, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, image: Optional[Union[torch.FloatTensor, PIL.Image.Image]] = None, - max_init_offset: float = 0.7, output_type: str = "pil", return_dict: bool = True, callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, @@ -474,7 +470,6 @@ class VlpnStableDiffusion(DiffusionPipeline): batch_size * num_images_per_prompt, width, height, - max_init_offset, prompt_embeds.dtype, device, generator -- cgit v1.2.3-54-g00ecf