File size: 2,872 Bytes
baa8e90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# import open_clip.tokenizer
import torch
from . import sd_hijack_clip, devices
from .shared import opts
# tokenizer = open_clip.tokenizer._tokenizer
class FrozenOpenCLIPEmbedderWithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWithCustomWordsBase):
def __init__(self, wrapped, hijack):
super().__init__(wrapped, hijack)
self.tokenizer = tokenizer = self.wrapped.tokenizer
self.comma_token = [v for k, v in tokenizer.encoder.items() if k == ',</w>'][0]
# self.id_start = tokenizer.encoder["<start_of_text>"]
# self.id_end = tokenizer.encoder["<end_of_text>"]
self.id_start = tokenizer.bos_token_id
self.id_end = tokenizer.eos_token_id
self.id_pad = 0
def tokenize(self, texts):
assert not opts.use_old_emphasis_implementation, 'Old emphasis implementation not supported for Open Clip'
tokenized = [self.tokenizer.encode(text) for text in texts]
return tokenized
def encode_with_transformers(self, tokens):
# set self.wrapped.layer_idx here according to opts.CLIP_stop_at_last_layers
z = self.wrapped.encode_with_transformer(tokens)
return z
def encode_embedding_init_text(self, init_text, nvpt):
ids = self.tokenizer.encode(init_text)
ids = torch.asarray([ids], device=devices.device, dtype=torch.int)
embedded = self.wrapped.model.token_embedding.wrapped(ids).squeeze(0)
return embedded
class FrozenOpenCLIPEmbedder2WithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWithCustomWordsBase):
def __init__(self, wrapped, hijack):
super().__init__(wrapped, hijack)
self.tokenizer = tokenizer = self.wrapped.tokenizer
self.comma_token = [v for k, v in tokenizer.encoder.items() if k == ',</w>'][0]
# self.id_start = tokenizer.encoder["<start_of_text>"]
# self.id_end = tokenizer.encoder["<end_of_text>"]
self.id_start = tokenizer.bos_token_id
self.id_end = tokenizer.eos_token_id
self.id_pad = 0
def tokenize(self, texts):
assert not opts.use_old_emphasis_implementation, 'Old emphasis implementation not supported for Open Clip'
tokenized = [self.tokenizer.encode(text) for text in texts]
return tokenized
def encode_with_transformers(self, tokens):
d = self.wrapped.encode_with_transformer(tokens)
z = d[self.wrapped.layer]
pooled = d.get("pooled")
if pooled is not None:
z.pooled = pooled
return z
def encode_embedding_init_text(self, init_text, nvpt):
ids = self.tokenizer.encode(init_text)
ids = torch.asarray([ids], device=devices.device, dtype=torch.int)
embedded = self.wrapped.model.token_embedding.wrapped(ids.to(self.wrapped.model.token_embedding.wrapped.weight.device)).squeeze(0)
return embedded
|