From 6c64f769043c8212b1a5778e857af691a828798d Mon Sep 17 00:00:00 2001 From: Volpeon Date: Thu, 5 Jan 2023 10:19:38 +0100 Subject: Various cleanups --- common.py | 44 -------------------------------------------- 1 file changed, 44 deletions(-) delete mode 100644 common.py (limited to 'common.py') diff --git a/common.py b/common.py deleted file mode 100644 index 0887197..0000000 --- a/common.py +++ /dev/null @@ -1,44 +0,0 @@ -from pathlib import Path -import json - -from models.clip.embeddings import ManagedCLIPTextEmbeddings -from models.clip.tokenizer import MultiCLIPTokenizer - -from safetensors import safe_open - - -def load_config(filename): - with open(filename, 'rt') as f: - config = json.load(f) - - args = config["args"] - - if "base" in config: - args = load_config(Path(filename).parent.joinpath(config["base"])) | args - - return args - - -def load_embeddings_from_dir(tokenizer: MultiCLIPTokenizer, embeddings: ManagedCLIPTextEmbeddings, embeddings_dir: Path): - if not embeddings_dir.exists() or not embeddings_dir.is_dir(): - return [] - - filenames = [filename for filename in embeddings_dir.iterdir() if filename.is_file()] - - new_tokens = [] - new_embeds = [] - - for filename in filenames: - with safe_open(filename, framework="pt", device="cpu") as file: - embed = file.get_tensor("embed") - - added = tokenizer.add_multi_tokens(filename.stem, embed.shape[0]) - new_tokens.append(added) - new_embeds.append(embed) - - embeddings.resize(len(tokenizer)) - - for (new_token, embeds) in zip(new_tokens, new_embeds): - embeddings.add_embed(new_token.ids, embeds) - - return new_tokens -- cgit v1.2.3-54-g00ecf