From 3924055ed24da9b6995303cd36282eb558ba0bf0 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Sun, 16 Apr 2023 14:45:37 +0200 Subject: Fix --- models/lora.py | 77 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 39 deletions(-) (limited to 'models/lora.py') diff --git a/models/lora.py b/models/lora.py index 01a540b..e506cff 100644 --- a/models/lora.py +++ b/models/lora.py @@ -1,8 +1,8 @@ from typing import Optional +import math import torch import torch.nn as nn -import torch.nn.functional as F class LoraLayer(): @@ -42,14 +42,12 @@ class LoraEmbedding(nn.Embedding, LoraLayer): self, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, merge_weights=merge_weights ) - self.register_buffer('trainable_ids', self.weight.new_zeros(num_embeddings, dtype=torch.long)) - self.trainable_ids -= 1 + self.register_buffer('trainable_ids', self.weight.new_zeros(num_embeddings, dtype=torch.long) - 1) - if r > 0: - self.lora_A = nn.ParameterList() - self.lora_B = nn.Linear(r, embedding_dim, bias=False) - self.scaling = self.lora_alpha / self.r - self.weight.requires_grad = False + self.lora_A = nn.ParameterList() + self.lora_B = nn.Linear(r, embedding_dim, bias=False) + self.scaling = self.lora_alpha / self.r + self.weight.requires_grad = False self.reset_parameters() @@ -70,8 +68,9 @@ class LoraEmbedding(nn.Embedding, LoraLayer): else: nn.init.zeros_(new_emb.weight.data) new_emb.weight.data[:n, :] = self.weight.data[:n, :] - new_emb.lora_A = self.lora_A - new_emb.lora_B = self.lora_B + for param in self.lora_A: + new_emb.lora_A.append(param) + new_emb.lora_B.weight[:].data = self.lora_B.weight[:].data new_emb.trainable_ids[:n] = self.trainable_ids[:n] return new_emb @@ -87,60 +86,60 @@ class LoraEmbedding(nn.Embedding, LoraLayer): n2 = n1 + new_ids.shape[0] self.trainable_ids[new_ids] = torch.arange(n1, n2) for _ in new_ids: - self.lora_A.append(nn.Parameter(self.weight.new_zeros(self.r))) + w = self.weight.new_zeros(self.r) + self.lora_A.append(w) + + if len(self.lora_A) > 1: + elems = torch.stack([param for param in self.lora_A]) + nn.init.kaiming_uniform_(elems, a=math.sqrt(5)) def get_weights(self, input_ids: torch.Tensor): if len(input_ids.shape) != 1: return torch.stack([self.get_weights(batch) for batch in input_ids]) - trainable_ids = self.trainable_ids[input_ids] - mask = ~(trainable_ids == -1) - trainable_ids = trainable_ids[mask] - weights = self.weight.new_zeros((input_ids.shape[0], self.embedding_dim)) - elems = [self.lora_A[id] for id in trainable_ids] - if len(elems) != 0: - w = self.lora_B(self.lora_dropout(torch.stack(elems))) * self.scaling - weights[mask] = w.to(dtype=weights.dtype) + if not self.merged: + trainable_ids = self.trainable_ids[input_ids] + mask = ~(trainable_ids == -1) + elems = [self.lora_A[id] for id in trainable_ids[mask]] + + if len(elems) != 0: + w = self.lora_B(self.lora_dropout(torch.stack(elems))) * self.scaling + weights[mask] = w.to(dtype=weights.dtype) return weights def persist(self): - if self.r > 0: - weights = self.get_weights(torch.arange(self.trainable_ids.shape[0])) - self.weight.data += weights - self.trainable_ids[:] = -1 - self.lora_A = nn.ParameterList() + self.weight.data += self.get_weights(torch.arange(self.trainable_ids.shape[0])) + self.trainable_ids[:] = -1 + self.lora_A = nn.ParameterList() + nn.init.zeros_(self.lora_B.weight) def reset_parameters(self): nn.Embedding.reset_parameters(self) - if hasattr(self, 'lora_A'): + if hasattr(self, "lora_A"): self.trainable_ids[:] = -1 self.lora_A = nn.ParameterList() nn.init.zeros_(self.lora_B.weight) def train(self, mode: bool = True): nn.Embedding.train(self, mode) - if self.merge_weights and self.merged: - if self.r > 0: - weights = self.get_weights(torch.arange(self.trainable_ids.shape[0])) - self.weight.data -= weights + self.lora_A.train(mode) + self.lora_B.train(mode) + if not mode and self.merge_weights and not self.merged: + self.weight.data += self.get_weights(torch.arange(self.trainable_ids.shape[0])) + self.merged = True + elif self.merge_weights and self.merged: + self.weight.data -= self.get_weights(torch.arange(self.trainable_ids.shape[0])) self.merged = False def eval(self): nn.Embedding.eval(self) - if self.merge_weights and not self.merged: - if self.r > 0: - weights = self.get_weights(torch.arange(self.trainable_ids.shape[0])) - self.weight.data += weights - self.merged = True + self.lora_A.eval() + self.lora_B.eval() def forward(self, input_ids: torch.LongTensor): result = nn.Embedding.forward(self, input_ids) - - if self.r > 0 and not self.merged: - weights = self.get_weights(input_ids) - result += weights - + result += self.get_weights(input_ids) return result -- cgit v1.2.3-54-g00ecf