From ecb12378da48fc3a17539d5cc33edc561cf8a426 Mon Sep 17 00:00:00 2001
From: Volpeon <git@volpeon.ink>
Date: Sun, 8 Jan 2023 20:33:04 +0100
Subject: Improved aspect ratio bucketing

---
 data/csv.py         | 22 +++++++++++++++++++---
 train_dreambooth.py | 27 +++++++++++++++++++++++++++
 train_ti.py         | 16 +++++++++++++++-
 3 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/data/csv.py b/data/csv.py
index 7527b7d..55a1988 100644
--- a/data/csv.py
+++ b/data/csv.py
@@ -44,18 +44,25 @@ def generate_buckets(
     items: list[str],
     base_size: int,
     step_size: int = 64,
+    max_pixels: Optional[int] = None,
     num_buckets: int = 4,
     progressive_buckets: bool = False,
     return_tensor: bool = True
 ):
+    if max_pixels is None:
+        max_pixels = (base_size + step_size) ** 2
+
+    max_pixels = max(max_pixels, base_size * base_size)
+
     bucket_items: list[int] = []
     bucket_assignments: list[int] = []
     buckets = [1.0]
 
     for i in range(1, num_buckets + 1):
-        s = base_size + i * step_size
-        buckets.append(s / base_size)
-        buckets.append(base_size / s)
+        long_side = base_size + i * step_size
+        short_side = min(base_size - math.ceil((base_size - max_pixels / long_side) / step_size) * step_size, base_size)
+        buckets.append(long_side / short_side)
+        buckets.append(short_side / long_side)
 
     buckets = torch.tensor(buckets)
     bucket_indices = torch.arange(len(buckets))
@@ -110,6 +117,8 @@ class VlpnDataModule():
         num_class_images: int = 1,
         size: int = 768,
         num_buckets: int = 0,
+        bucket_step_size: int = 64,
+        max_pixels_per_bucket: Optional[int] = None,
         progressive_buckets: bool = False,
         dropout: float = 0,
         interpolation: str = "bicubic",
@@ -135,6 +144,8 @@ class VlpnDataModule():
         self.prompt_processor = prompt_processor
         self.size = size
         self.num_buckets = num_buckets
+        self.bucket_step_size = bucket_step_size
+        self.max_pixels_per_bucket = max_pixels_per_bucket
         self.progressive_buckets = progressive_buckets
         self.dropout = dropout
         self.template_key = template_key
@@ -223,6 +234,7 @@ class VlpnDataModule():
         train_dataset = VlpnDataset(
             self.data_train, self.prompt_processor,
             num_buckets=self.num_buckets, progressive_buckets=self.progressive_buckets,
+            bucket_step_size=self.bucket_step_size, max_pixels_per_bucket=self.max_pixels_per_bucket,
             batch_size=self.batch_size, generator=generator,
             size=self.size, interpolation=self.interpolation,
             num_class_images=self.num_class_images, dropout=self.dropout, shuffle=True,
@@ -251,6 +263,8 @@ class VlpnDataset(IterableDataset):
         items: list[VlpnDataItem],
         prompt_processor: PromptProcessor,
         num_buckets: int = 1,
+        bucket_step_size: int = 64,
+        max_pixels_per_bucket: Optional[int] = None,
         progressive_buckets: bool = False,
         batch_size: int = 1,
         num_class_images: int = 0,
@@ -274,7 +288,9 @@ class VlpnDataset(IterableDataset):
         self.buckets, self.bucket_items, self.bucket_assignments = generate_buckets(
             [item.instance_image_path for item in items],
             base_size=size,
+            step_size=bucket_step_size,
             num_buckets=num_buckets,
+            max_pixels=max_pixels_per_bucket,
             progressive_buckets=progressive_buckets,
         )
 
diff --git a/train_dreambooth.py b/train_dreambooth.py
index 79eede6..d396249 100644
--- a/train_dreambooth.py
+++ b/train_dreambooth.py
@@ -103,6 +103,29 @@ def parse_args():
         default=999999,
         help="Number of epochs the text encoder will be trained."
     )
+    parser.add_argument(
+        "--num_buckets",
+        type=int,
+        default=4,
+        help="Number of aspect ratio buckets in either direction.",
+    )
+    parser.add_argument(
+        "--progressive_buckets",
+        action="store_true",
+        help="Include images in smaller buckets as well.",
+    )
+    parser.add_argument(
+        "--bucket_step_size",
+        type=int,
+        default=64,
+        help="Step size between buckets.",
+    )
+    parser.add_argument(
+        "--bucket_max_pixels",
+        type=int,
+        default=None,
+        help="Maximum pixels per bucket.",
+    )
     parser.add_argument(
         "--tag_dropout",
         type=float,
@@ -734,6 +757,10 @@ def main():
         class_subdir=args.class_image_dir,
         num_class_images=args.num_class_images,
         size=args.resolution,
+        num_buckets=args.num_buckets,
+        progressive_buckets=args.progressive_buckets,
+        bucket_step_size=args.bucket_step_size,
+        bucket_max_pixels=args.bucket_max_pixels,
         dropout=args.tag_dropout,
         template_key=args.train_data_template,
         valid_set_size=args.valid_set_size,
diff --git a/train_ti.py b/train_ti.py
index 323ef10..eb0b8b6 100644
--- a/train_ti.py
+++ b/train_ti.py
@@ -143,13 +143,25 @@ def parse_args():
         "--num_buckets",
         type=int,
         default=4,
-        help="Number of aspect ratio buckets in either direction (adds 64 pixels per step).",
+        help="Number of aspect ratio buckets in either direction.",
     )
     parser.add_argument(
         "--progressive_buckets",
         action="store_true",
         help="Include images in smaller buckets as well.",
     )
+    parser.add_argument(
+        "--bucket_step_size",
+        type=int,
+        default=64,
+        help="Step size between buckets.",
+    )
+    parser.add_argument(
+        "--bucket_max_pixels",
+        type=int,
+        default=None,
+        help="Maximum pixels per bucket.",
+    )
     parser.add_argument(
         "--tag_dropout",
         type=float,
@@ -718,6 +730,8 @@ def main():
         size=args.resolution,
         num_buckets=args.num_buckets,
         progressive_buckets=args.progressive_buckets,
+        bucket_step_size=args.bucket_step_size,
+        bucket_max_pixels=args.bucket_max_pixels,
         dropout=args.tag_dropout,
         template_key=args.train_data_template,
         valid_set_size=args.valid_set_size,
-- 
cgit v1.2.3-70-g09d2