Pipeline: Perlin noise for init image

author: Volpeon <git@volpeon.ink> 2023-03-04 09:46:41 +0100
committer: Volpeon <git@volpeon.ink> 2023-03-04 09:46:41 +0100
commit: 73910b7f55244ce787fc6a3e6af09240ef0cdfd3 (patch)
tree: 3ef927578fc54b59ab6ff1bd00c3f804c0b9a7bf /pipelines/stable_diffusion/vlpn_stable_diffusion.py
parent: Pipeline: Improved initial image generation (diff)
download: textual-inversion-diff-73910b7f55244ce787fc6a3e6af09240ef0cdfd3.tar.gz
textual-inversion-diff-73910b7f55244ce787fc6a3e6af09240ef0cdfd3.tar.bz2
textual-inversion-diff-73910b7f55244ce787fc6a3e6af09240ef0cdfd3.zip
1 files changed, 9 insertions, 14 deletions
diff --git a/pipelines/stable_diffusion/vlpn_stable_diffusion.py b/pipelines/stable_diffusion/vlpn_stable_diffusion.py
index 2251848..a6b31d8 100644
--- a/pipelines/stable_diffusion/vlpn_stable_diffusion.py
+++ b/pipelines/stable_diffusion/vlpn_stable_diffusion.py
@@ -24,7 +24,9 @@ from diffusers import (
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipelineOutput
 from diffusers.utils import logging, randn_tensor
 from transformers import CLIPTextModel, CLIPTokenizer
 from models.clip.util import unify_input_ids, get_extended_embeddings
+from util.noise import perlin_noise
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -304,23 +306,18 @@ class VlpnStableDiffusion(DiffusionPipeline):
        return timesteps, num_inference_steps - t_start
-    def prepare_image(self, batch_size, width, height, max_offset, dtype, device, generator=None):
+    def prepare_image(self, batch_size, width, height, dtype, device, generator=None):
-        offset = (max_offset * (2 * torch.rand(
+        max = 0.4
+        offset = max * (2 * torch.rand(
            (batch_size, 1, 1, 1),
            dtype=dtype,
            device=device,
            generator=generator
-        ) - 1)).expand(batch_size, 1, 2, 2)
+        ) - 1)
-        image = F.interpolate(
+        noise = perlin_noise(
-            torch.normal(
+            batch_size, width, height, res=3, octaves=3, generator=generator, dtype=dtype, device=device
-                mean=offset,
-                std=0.3,
-                generator=generator
-            ).clamp(-1, 1),
-            size=(width, height),
-            mode="bicubic"
        ).expand(batch_size, 3, width, height)
-        return image
+        return ((1 + max) * noise + max * offset).clamp(-1, 1)
    def prepare_latents(self, init_image, timestep, batch_size, dtype, device, generator=None):
        init_image = init_image.to(device=device, dtype=dtype)
@@ -384,7 +381,6 @@ class VlpnStableDiffusion(DiffusionPipeline):
        eta: float = 0.0,
        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
        image: Optional[Union[torch.FloatTensor, PIL.Image.Image]] = None,
-        max_init_offset: float = 0.7,
        output_type: str = "pil",
        return_dict: bool = True,
        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
@@ -474,7 +470,6 @@ class VlpnStableDiffusion(DiffusionPipeline):
                batch_size * num_images_per_prompt,
                width,
                height,
-                max_init_offset,
                prompt_embeds.dtype,
                device,
                generator
author	Volpeon <git@volpeon.ink>	2023-03-04 09:46:41 +0100
committer	Volpeon <git@volpeon.ink>	2023-03-04 09:46:41 +0100
commit	73910b7f55244ce787fc6a3e6af09240ef0cdfd3 (patch)
tree	3ef927578fc54b59ab6ff1bd00c3f804c0b9a7bf /pipelines/stable_diffusion/vlpn_stable_diffusion.py
parent	Pipeline: Improved initial image generation (diff)
download	textual-inversion-diff-73910b7f55244ce787fc6a3e6af09240ef0cdfd3.tar.gz textual-inversion-diff-73910b7f55244ce787fc6a3e6af09240ef0cdfd3.tar.bz2 textual-inversion-diff-73910b7f55244ce787fc6a3e6af09240ef0cdfd3.zip