2 files changed, 15 insertions, 9 deletions
diff --git a/training/lr.py b/training/lr.py
index c1fa3a0..c0e9b3f 100644
--- a/training/lr.py
+++ b/training/lr.py
@@ -19,8 +19,8 @@ class LRFinder():
        self.val_dataloader = val_dataloader
        self.loss_fn = loss_fn
-        self.model_state = copy.deepcopy(model.state_dict())
+        # self.model_state = copy.deepcopy(model.state_dict())
-        self.optimizer_state = copy.deepcopy(optimizer.state_dict())
+        # self.optimizer_state = copy.deepcopy(optimizer.state_dict())
    def run(self, min_lr, skip_start=10, skip_end=5, num_epochs=100, num_train_batches=1, num_val_batches=math.inf, smooth_f=0.05, diverge_th=5):
        best_loss = None
@@ -109,8 +109,8 @@ class LRFinder():
                "lr": lr,
            })
-            self.model.load_state_dict(self.model_state)
+            # self.model.load_state_dict(self.model_state)
-            self.optimizer.load_state_dict(self.optimizer_state)
+            # self.optimizer.load_state_dict(self.optimizer_state)
            if loss > diverge_th * best_loss:
                print("Stopping early, the loss has diverged")
@@ -127,12 +127,14 @@ class LRFinder():
        fig, ax_loss = plt.subplots()
-        ax_loss.plot(lrs, losses, color='red', label='Loss')
+        ax_loss.plot(lrs, losses, color='red')
        ax_loss.set_xscale("log")
        ax_loss.set_xlabel("Learning rate")
+        ax_loss.set_ylabel("Loss")
        # ax_acc = ax_loss.twinx()
-        # ax_acc.plot(lrs, accs, color='blue', label='Accuracy')
+        # ax_acc.plot(lrs, accs, color='blue')
+        # ax_acc.set_ylabel("Accuracy")
        print("LR suggestion: steepest gradient")
        min_grad_idx = None
diff --git a/training/optimization.py b/training/optimization.py
index 3809f3b..a0c8673 100644
--- a/training/optimization.py
+++ b/training/optimization.py
@@ -6,7 +6,7 @@ from diffusers.utils import logging
 logger = logging.get_logger(__name__)
-def get_one_cycle_schedule(optimizer, num_training_steps, annealing="cos", min_lr=0.01, mid_point=0.4, last_epoch=-1):
+def get_one_cycle_schedule(optimizer, num_training_steps, annealing="cos", min_lr=0.04, mid_point=0.3, last_epoch=-1):
    """
    Create a schedule with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after
    a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer.
@@ -35,8 +35,12 @@ def get_one_cycle_schedule(optimizer, num_training_steps, annealing="cos", min_l
            progress = float(num_training_steps - current_step) / float(max(1, num_training_steps - thresh_down))
            return max(0.0, progress) * min_lr
-        else:
-            progress = float(current_step - thresh_up) / float(max(1, num_training_steps - thresh_up))
+        progress = float(current_step - thresh_up) / float(max(1, num_training_steps - thresh_up))
+        if annealing == "half_cos":
            return max(0.0, 1.0 + math.cos(math.pi * (0.5 + 0.5 * progress)))
+        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * progress)))
    return LambdaLR(optimizer, lr_lambda, last_epoch)

diff --git a/training/lr.py b/training/lr.py index c1fa3a0..c0e9b3f 100644 --- a/training/lr.py +++ b/training/lr.py
@@ -19,8 +19,8 @@ class LRFinder():
19	self.val_dataloader = val_dataloader	19	self.val_dataloader = val_dataloader
20	self.loss_fn = loss_fn	20	self.loss_fn = loss_fn
21		21
22	self.model_state = copy.deepcopy(model.state_dict())	22	# self.model_state = copy.deepcopy(model.state_dict())
23	self.optimizer_state = copy.deepcopy(optimizer.state_dict())	23	# self.optimizer_state = copy.deepcopy(optimizer.state_dict())
24		24
25	def run(self, min_lr, skip_start=10, skip_end=5, num_epochs=100, num_train_batches=1, num_val_batches=math.inf, smooth_f=0.05, diverge_th=5):	25	def run(self, min_lr, skip_start=10, skip_end=5, num_epochs=100, num_train_batches=1, num_val_batches=math.inf, smooth_f=0.05, diverge_th=5):
26	best_loss = None	26	best_loss = None
@@ -109,8 +109,8 @@ class LRFinder():
109	"lr": lr,	109	"lr": lr,
110	})	110	})
111		111
112	self.model.load_state_dict(self.model_state)	112	# self.model.load_state_dict(self.model_state)
113	self.optimizer.load_state_dict(self.optimizer_state)	113	# self.optimizer.load_state_dict(self.optimizer_state)
114		114
115	if loss > diverge_th * best_loss:	115	if loss > diverge_th * best_loss:
116	print("Stopping early, the loss has diverged")	116	print("Stopping early, the loss has diverged")
@@ -127,12 +127,14 @@ class LRFinder():
127		127
128	fig, ax_loss = plt.subplots()	128	fig, ax_loss = plt.subplots()
129		129
130	ax_loss.plot(lrs, losses, color='red', label='Loss')	130	ax_loss.plot(lrs, losses, color='red')
131	ax_loss.set_xscale("log")	131	ax_loss.set_xscale("log")
132	ax_loss.set_xlabel("Learning rate")	132	ax_loss.set_xlabel("Learning rate")
		133	ax_loss.set_ylabel("Loss")
133		134
134	# ax_acc = ax_loss.twinx()	135	# ax_acc = ax_loss.twinx()
135	# ax_acc.plot(lrs, accs, color='blue', label='Accuracy')	136	# ax_acc.plot(lrs, accs, color='blue')
		137	# ax_acc.set_ylabel("Accuracy")
136		138
137	print("LR suggestion: steepest gradient")	139	print("LR suggestion: steepest gradient")
138	min_grad_idx = None	140	min_grad_idx = None


diff --git a/training/optimization.py b/training/optimization.py index 3809f3b..a0c8673 100644 --- a/training/optimization.py +++ b/training/optimization.py
@@ -6,7 +6,7 @@ from diffusers.utils import logging
6	logger = logging.get_logger(__name__)	6	logger = logging.get_logger(__name__)
7		7
8		8
9	def get_one_cycle_schedule(optimizer, num_training_steps, annealing="cos", min_lr=0.01, mid_point=0.4, last_epoch=-1):	9	def get_one_cycle_schedule(optimizer, num_training_steps, annealing="cos", min_lr=0.04, mid_point=0.3, last_epoch=-1):
10	"""	10	"""
11	Create a schedule with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after	11	Create a schedule with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after
12	a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer.	12	a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer.
@@ -35,8 +35,12 @@ def get_one_cycle_schedule(optimizer, num_training_steps, annealing="cos", min_l
35		35
36	progress = float(num_training_steps - current_step) / float(max(1, num_training_steps - thresh_down))	36	progress = float(num_training_steps - current_step) / float(max(1, num_training_steps - thresh_down))
37	return max(0.0, progress) * min_lr	37	return max(0.0, progress) * min_lr
38	else:	38
39	progress = float(current_step - thresh_up) / float(max(1, num_training_steps - thresh_up))	39	progress = float(current_step - thresh_up) / float(max(1, num_training_steps - thresh_up))
		40
		41	if annealing == "half_cos":
40	return max(0.0, 1.0 + math.cos(math.pi * (0.5 + 0.5 * progress)))	42	return max(0.0, 1.0 + math.cos(math.pi * (0.5 + 0.5 * progress)))
41		43
		44	return max(0.0, 0.5 * (1.0 + math.cos(math.pi * progress)))
		45
42	return LambdaLR(optimizer, lr_lambda, last_epoch)	46	return LambdaLR(optimizer, lr_lambda, last_epoch)