Better batch filling

author: Volpeon <git@volpeon.ink> 2023-02-15 12:22:44 +0100
committer: Volpeon <git@volpeon.ink> 2023-02-15 12:22:44 +0100
commit: fad870919737a19ea28f0c501f8139ce6a98b7fb (patch)
tree: 4137f98df036bace7e1b40563560c257218306ee /data
parent: Better batch filling behavior (diff)
download: textual-inversion-diff-fad870919737a19ea28f0c501f8139ce6a98b7fb.tar.gz
textual-inversion-diff-fad870919737a19ea28f0c501f8139ce6a98b7fb.tar.bz2
textual-inversion-diff-fad870919737a19ea28f0c501f8139ce6a98b7fb.zip
1 files changed, 6 insertions, 3 deletions
diff --git a/data/csv.py b/data/csv.py
index 4ebdc1e..480e9f2 100644
--- a/data/csv.py
+++ b/data/csv.py
@@ -2,7 +2,6 @@ import math
 import torch
 import json
 from functools import partial
-import itertools
 from pathlib import Path
 from typing import NamedTuple, Optional, Union, Callable
@@ -411,7 +410,9 @@ class VlpnDataset(IterableDataset):
            if len(bucket_items) == 0:
                if len(batch) != 0:
                    if self.fill_batch:
-                        batch = list(itertools.islice(itertools.cycle(batch), batch_size))
+                        fill_items = self.bucket_items[self.bucket_assignments == bucket]
+                        fill_perm = torch.randint(len(fill_items), (batch_size - len(batch),), generator=self.generator)
+                        batch += fill_items[fill_perm]
                    yield batch
                    batch = []
@@ -452,5 +453,7 @@ class VlpnDataset(IterableDataset):
        if len(batch) != 0:
            if self.fill_batch:
-                batch = list(itertools.islice(itertools.cycle(batch), batch_size))
+                fill_items = self.bucket_items[self.bucket_assignments == bucket]
+                fill_perm = torch.randint(len(fill_items), (batch_size - len(batch),), generator=self.generator)
+                batch += fill_items[fill_perm]
            yield batch
author	Volpeon <git@volpeon.ink>	2023-02-15 12:22:44 +0100
committer	Volpeon <git@volpeon.ink>	2023-02-15 12:22:44 +0100
commit	fad870919737a19ea28f0c501f8139ce6a98b7fb (patch)
tree	4137f98df036bace7e1b40563560c257218306ee /data
parent	Better batch filling behavior (diff)
download	textual-inversion-diff-fad870919737a19ea28f0c501f8139ce6a98b7fb.tar.gz textual-inversion-diff-fad870919737a19ea28f0c501f8139ce6a98b7fb.tar.bz2 textual-inversion-diff-fad870919737a19ea28f0c501f8139ce6a98b7fb.zip