blob: 545bcb52834681e579867adc2e87fd215d443696 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
|
from pathlib import Path
import json
from models.clip.embeddings import ManagedCLIPTextEmbeddings
from models.clip.tokenizer import MultiCLIPTokenizer
from safetensors import safe_open
def load_config(filename):
with open(filename, 'rt') as f:
config = json.load(f)
args = config["args"]
if "base" in config:
args = load_config(Path(filename).parent.joinpath(config["base"])) | args
return args
def load_embeddings_from_dir(tokenizer: MultiCLIPTokenizer, embeddings: ManagedCLIPTextEmbeddings, embeddings_dir: Path):
if not embeddings_dir.exists() or not embeddings_dir.is_dir():
return []
filenames = [filename for filename in embeddings_dir.iterdir() if filename.is_file()]
tokens = [filename.stem for filename in filenames]
new_ids: list[list[int]] = []
new_embeds = []
for filename in filenames:
with safe_open(filename, framework="pt", device="cpu") as file:
embed = file.get_tensor("embed")
added = tokenizer.add_multi_tokens(filename.stem, embed.shape[0])
new_ids.append(added)
new_embeds.append(embed)
embeddings.resize(len(tokenizer))
for (new_id, embeds) in zip(new_ids, new_embeds):
embeddings.add_embed(new_id, embeds)
return tokens, new_ids
|