File size: 722 Bytes
8866644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import os
from transformers import AutoTokenizer
from comfy.sd1_clip import SDTokenizer

class MiaoBiTokenizer(SDTokenizer):
	def __init__(self, **kwargs):
		super().__init__(**kwargs)
		tokenizer_path = os.path.join(
			os.path.dirname(os.path.realpath(__file__)),
			f"tokenizer"
		)
		# remote code ok, see `clip_tokenizer_roberta.py`, no ckpt vocab
		self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)

		empty = self.tokenizer('')["input_ids"]
		if self.tokens_start:
			self.start_token = empty[0]
			self.end_token = empty[1]
		else:
			self.start_token = None
			self.end_token = empty[0]

		vocab = self.tokenizer.get_vocab()
		self.inv_vocab = {v: k for k, v in vocab.items()}