diff options
| author | Volpeon <git@volpeon.ink> | 2023-03-04 09:46:41 +0100 |
|---|---|---|
| committer | Volpeon <git@volpeon.ink> | 2023-03-04 09:46:41 +0100 |
| commit | 73910b7f55244ce787fc6a3e6af09240ef0cdfd3 (patch) | |
| tree | 3ef927578fc54b59ab6ff1bd00c3f804c0b9a7bf /pipelines/stable_diffusion/vlpn_stable_diffusion.py | |
| parent | Pipeline: Improved initial image generation (diff) | |
| download | textual-inversion-diff-73910b7f55244ce787fc6a3e6af09240ef0cdfd3.tar.gz textual-inversion-diff-73910b7f55244ce787fc6a3e6af09240ef0cdfd3.tar.bz2 textual-inversion-diff-73910b7f55244ce787fc6a3e6af09240ef0cdfd3.zip | |
Pipeline: Perlin noise for init image
Diffstat (limited to 'pipelines/stable_diffusion/vlpn_stable_diffusion.py')
| -rw-r--r-- | pipelines/stable_diffusion/vlpn_stable_diffusion.py | 23 |
1 files changed, 9 insertions, 14 deletions
diff --git a/pipelines/stable_diffusion/vlpn_stable_diffusion.py b/pipelines/stable_diffusion/vlpn_stable_diffusion.py index 2251848..a6b31d8 100644 --- a/pipelines/stable_diffusion/vlpn_stable_diffusion.py +++ b/pipelines/stable_diffusion/vlpn_stable_diffusion.py | |||
| @@ -24,7 +24,9 @@ from diffusers import ( | |||
| 24 | from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput | 24 | from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput |
| 25 | from diffusers.utils import logging, randn_tensor | 25 | from diffusers.utils import logging, randn_tensor |
| 26 | from transformers import CLIPTextModel, CLIPTokenizer | 26 | from transformers import CLIPTextModel, CLIPTokenizer |
| 27 | |||
| 27 | from models.clip.util import unify_input_ids, get_extended_embeddings | 28 | from models.clip.util import unify_input_ids, get_extended_embeddings |
| 29 | from util.noise import perlin_noise | ||
| 28 | 30 | ||
| 29 | logger = logging.get_logger(__name__) # pylint: disable=invalid-name | 31 | logger = logging.get_logger(__name__) # pylint: disable=invalid-name |
| 30 | 32 | ||
| @@ -304,23 +306,18 @@ class VlpnStableDiffusion(DiffusionPipeline): | |||
| 304 | 306 | ||
| 305 | return timesteps, num_inference_steps - t_start | 307 | return timesteps, num_inference_steps - t_start |
| 306 | 308 | ||
| 307 | def prepare_image(self, batch_size, width, height, max_offset, dtype, device, generator=None): | 309 | def prepare_image(self, batch_size, width, height, dtype, device, generator=None): |
| 308 | offset = (max_offset * (2 * torch.rand( | 310 | max = 0.4 |
| 311 | offset = max * (2 * torch.rand( | ||
| 309 | (batch_size, 1, 1, 1), | 312 | (batch_size, 1, 1, 1), |
| 310 | dtype=dtype, | 313 | dtype=dtype, |
| 311 | device=device, | 314 | device=device, |
| 312 | generator=generator | 315 | generator=generator |
| 313 | ) - 1)).expand(batch_size, 1, 2, 2) | 316 | ) - 1) |
| 314 | image = F.interpolate( | 317 | noise = perlin_noise( |
| 315 | torch.normal( | 318 | batch_size, width, height, res=3, octaves=3, generator=generator, dtype=dtype, device=device |
| 316 | mean=offset, | ||
| 317 | std=0.3, | ||
| 318 | generator=generator | ||
| 319 | ).clamp(-1, 1), | ||
| 320 | size=(width, height), | ||
| 321 | mode="bicubic" | ||
| 322 | ).expand(batch_size, 3, width, height) | 319 | ).expand(batch_size, 3, width, height) |
| 323 | return image | 320 | return ((1 + max) * noise + max * offset).clamp(-1, 1) |
| 324 | 321 | ||
| 325 | def prepare_latents(self, init_image, timestep, batch_size, dtype, device, generator=None): | 322 | def prepare_latents(self, init_image, timestep, batch_size, dtype, device, generator=None): |
| 326 | init_image = init_image.to(device=device, dtype=dtype) | 323 | init_image = init_image.to(device=device, dtype=dtype) |
| @@ -384,7 +381,6 @@ class VlpnStableDiffusion(DiffusionPipeline): | |||
| 384 | eta: float = 0.0, | 381 | eta: float = 0.0, |
| 385 | generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, | 382 | generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, |
| 386 | image: Optional[Union[torch.FloatTensor, PIL.Image.Image]] = None, | 383 | image: Optional[Union[torch.FloatTensor, PIL.Image.Image]] = None, |
| 387 | max_init_offset: float = 0.7, | ||
| 388 | output_type: str = "pil", | 384 | output_type: str = "pil", |
| 389 | return_dict: bool = True, | 385 | return_dict: bool = True, |
| 390 | callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, | 386 | callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, |
| @@ -474,7 +470,6 @@ class VlpnStableDiffusion(DiffusionPipeline): | |||
| 474 | batch_size * num_images_per_prompt, | 470 | batch_size * num_images_per_prompt, |
| 475 | width, | 471 | width, |
| 476 | height, | 472 | height, |
| 477 | max_init_offset, | ||
| 478 | prompt_embeds.dtype, | 473 | prompt_embeds.dtype, |
| 479 | device, | 474 | device, |
| 480 | generator | 475 | generator |
