From 83808fe00ac891ad2f625388d144c318b2cb5bfe Mon Sep 17 00:00:00 2001 From: Volpeon Date: Sat, 14 Jan 2023 21:53:07 +0100 Subject: WIP: Modularization ("free(): invalid pointer" my ass) --- training/lora.py | 107 ------------------------------------------------------- 1 file changed, 107 deletions(-) delete mode 100644 training/lora.py (limited to 'training/lora.py') diff --git a/training/lora.py b/training/lora.py deleted file mode 100644 index 3857d78..0000000 --- a/training/lora.py +++ /dev/null @@ -1,107 +0,0 @@ -import torch -import torch.nn as nn - -from diffusers import ModelMixin, ConfigMixin -from diffusers.configuration_utils import register_to_config -from diffusers.models.cross_attention import CrossAttention -from diffusers.utils.import_utils import is_xformers_available - - -if is_xformers_available(): - import xformers - import xformers.ops -else: - xformers = None - - -class LoRALinearLayer(nn.Module): - def __init__(self, in_features, out_features, rank=4): - super().__init__() - - if rank > min(in_features, out_features): - raise ValueError( - f"LoRA rank {rank} must be less or equal than {min(in_features, out_features)}" - ) - - self.lora_down = nn.Linear(in_features, rank, bias=False) - self.lora_up = nn.Linear(rank, out_features, bias=False) - self.scale = 1.0 - - nn.init.normal_(self.lora_down.weight, std=1 / rank) - nn.init.zeros_(self.lora_up.weight) - - def forward(self, hidden_states): - down_hidden_states = self.lora_down(hidden_states) - up_hidden_states = self.lora_up(down_hidden_states) - - return up_hidden_states - - -class LoRACrossAttnProcessor(nn.Module): - def __init__(self, hidden_size, cross_attention_dim=None, rank=4): - super().__init__() - - self.to_q_lora = LoRALinearLayer(hidden_size, hidden_size) - self.to_k_lora = LoRALinearLayer(cross_attention_dim or hidden_size, hidden_size) - self.to_v_lora = LoRALinearLayer(cross_attention_dim or hidden_size, hidden_size) - self.to_out_lora = LoRALinearLayer(hidden_size, hidden_size) - - def __call__(self, attn: CrossAttention, hidden_states, encoder_hidden_states=None, attention_mask=None, scale=1.0): - batch_size, sequence_length, _ = hidden_states.shape - attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length) - - query = attn.to_q(hidden_states) + scale * self.to_q_lora(hidden_states) - query = attn.head_to_batch_dim(query) - - encoder_hidden_states = encoder_hidden_states if encoder_hidden_states is not None else hidden_states - - key = attn.to_k(encoder_hidden_states) + scale * self.to_k_lora(encoder_hidden_states) - value = attn.to_v(encoder_hidden_states) + scale * self.to_v_lora(encoder_hidden_states) - - key = attn.head_to_batch_dim(key) - value = attn.head_to_batch_dim(value) - - attention_probs = attn.get_attention_scores(query, key, attention_mask) - hidden_states = torch.bmm(attention_probs, value) - hidden_states = attn.batch_to_head_dim(hidden_states) - - # linear proj - hidden_states = attn.to_out[0](hidden_states) + scale * self.to_out_lora(hidden_states) - # dropout - hidden_states = attn.to_out[1](hidden_states) - - return hidden_states - - -class LoRAXFormersCrossAttnProcessor(nn.Module): - def __init__(self, hidden_size, cross_attention_dim, rank=4): - super().__init__() - - self.to_q_lora = LoRALinearLayer(hidden_size, hidden_size) - self.to_k_lora = LoRALinearLayer(cross_attention_dim or hidden_size, hidden_size) - self.to_v_lora = LoRALinearLayer(cross_attention_dim or hidden_size, hidden_size) - self.to_out_lora = LoRALinearLayer(hidden_size, hidden_size) - - def __call__(self, attn: CrossAttention, hidden_states, encoder_hidden_states=None, attention_mask=None, scale=1.0): - batch_size, sequence_length, _ = hidden_states.shape - attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length) - - query = attn.to_q(hidden_states) + scale * self.to_q_lora(hidden_states) - query = attn.head_to_batch_dim(query).contiguous() - - encoder_hidden_states = encoder_hidden_states if encoder_hidden_states is not None else hidden_states - - key = attn.to_k(encoder_hidden_states) + scale * self.to_k_lora(encoder_hidden_states) - value = attn.to_v(encoder_hidden_states) + scale * self.to_v_lora(encoder_hidden_states) - - key = attn.head_to_batch_dim(key).contiguous() - value = attn.head_to_batch_dim(value).contiguous() - - hidden_states = xformers.ops.memory_efficient_attention(query, key, value, attn_bias=attention_mask) - - # linear proj - hidden_states = attn.to_out[0](hidden_states) + scale * self.to_out_lora(hidden_states) - # dropout - hidden_states = attn.to_out[1](hidden_states) - - return hidden_states -- cgit v1.2.3-54-g00ecf