1 files changed, 17 insertions, 12 deletions
diff --git a/models/clip/embeddings.py b/models/clip/embeddings.py
index 7c7f2ac..8c3c6d4 100644
--- a/models/clip/embeddings.py
+++ b/models/clip/embeddings.py
@@ -14,7 +14,13 @@ from models.sparse import SparseEmbedding
 class ManagedCLIPTextEmbeddings(CLIPTextEmbeddings):
-    def __init__(self, config: CLIPTextConfig, embeddings: CLIPTextEmbeddings, alpha: int = 8, dropout: float = 0.0):
+    def __init__(
+        self,
+        config: CLIPTextConfig,
+        embeddings: CLIPTextEmbeddings,
+        alpha: int = 8,
+        dropout: float = 0.0,
+    ):
        super().__init__(config)
        self.position_embedding = embeddings.position_embedding
@@ -28,7 +34,9 @@ class ManagedCLIPTextEmbeddings(CLIPTextEmbeddings):
        self.token_embedding.weight = embeddings.token_embedding.weight
    def resize(self, size: int):
-        self.token_embedding = self.token_embedding.new_resized(size, self.initializer_factor)
+        self.token_embedding = self.token_embedding.new_resized(
+            size, self.initializer_factor
+        )
    def add_embed(
        self,
@@ -46,7 +54,7 @@ class ManagedCLIPTextEmbeddings(CLIPTextEmbeddings):
            initializer = [initializer]
        if isinstance(initializer, list):
-            initializer = (initializer * len(token_ids))[:len(token_ids)]
+            initializer = (initializer * len(token_ids))[: len(token_ids)]
            with torch.no_grad():
                initializer = self.get_embed(initializer)
@@ -76,24 +84,21 @@ class ManagedCLIPTextEmbeddings(CLIPTextEmbeddings):
    def get_embed(self, input_ids: Union[list[int], torch.LongTensor]):
        if isinstance(input_ids, list):
-            input_ids = torch.tensor(input_ids, device=self.token_embedding.weight.device, dtype=torch.long)
+            input_ids = torch.tensor(
+                input_ids, device=self.token_embedding.weight.device, dtype=torch.long
+            )
        return self.token_embedding(input_ids)
 def patch_managed_embeddings(
-    text_encoder: CLIPTextModel,
+    text_encoder: CLIPTextModel, alpha: int = 8, dropout: float = 0.0
-    alpha: int = 8,
-    dropout: float = 0.0
 ) -> ManagedCLIPTextEmbeddings:
    if isinstance(text_encoder.text_model.embeddings, ManagedCLIPTextEmbeddings):
        return text_encoder.text_model.embeddings
-    
    text_embeddings = ManagedCLIPTextEmbeddings(
-        text_encoder.config,
+        text_encoder.config, text_encoder.text_model.embeddings, alpha, dropout
-        text_encoder.text_model.embeddings,
-        alpha,
-        dropout
    )
    text_encoder.text_model.embeddings = text_embeddings
    return text_embeddings