summaryrefslogtreecommitdiffstats
path: root/util/ti.py
blob: 4cc732ed05493b106a95f587c00c5e5fee755fce (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from pathlib import Path

import torch

from models.clip.embeddings import ManagedCLIPTextEmbeddings
from models.clip.tokenizer import MultiCLIPTokenizer


def load_embeddings(
    tokenizer: MultiCLIPTokenizer,
    embeddings: ManagedCLIPTextEmbeddings,
    tokens: list[str],
    token_embeddings: torch.FloatTensor,
):
    num_vectors = [embedding.shape[0] for embedding in token_embeddings]

    token_ids = tokenizer.add_multi_tokens(tokens, num_vectors)

    embeddings.resize(len(tokenizer))

    for (new_id, embeds) in zip(token_ids, token_embeddings):
        embeddings.add_embed(new_id, embeds)

    return tokens, token_ids