Update isoformer_tokenizer.py
Browse files- isoformer_tokenizer.py +6 -1
isoformer_tokenizer.py
CHANGED
@@ -38,11 +38,16 @@ class IsoformerTokenizer(PreTrainedTokenizer):
|
|
38 |
self,
|
39 |
**kwargs
|
40 |
):
|
41 |
-
|
42 |
pretrained_model_path = kwargs.get("pretrained_model_name_or_path", "")
|
|
|
|
|
43 |
dna_vocab_path = os.path.join(pretrained_model_path, "dna_vocab_list.txt")
|
44 |
rna_vocab_path = os.path.join(pretrained_model_path, "rna_vocab_list.txt")
|
45 |
protein_vocab_path = os.path.join(pretrained_model_path, "protein_vocab_list.txt")
|
|
|
|
|
|
|
46 |
|
47 |
dna_hf_tokenizer = EsmTokenizer(dna_vocab_path, model_max_length=196608)
|
48 |
dna_hf_tokenizer.eos_token = None # Stops the tokenizer adding an EOS/SEP token at the end
|
|
|
38 |
self,
|
39 |
**kwargs
|
40 |
):
|
41 |
+
print(f"\n>>> DEBUG: IsoformerTokenizer __init__ received kwargs: {kwargs}")
|
42 |
pretrained_model_path = kwargs.get("pretrained_model_name_or_path", "")
|
43 |
+
print(f">>> DEBUG: Determined pretrained_model_path: '{pretrained_model_path}'")
|
44 |
+
|
45 |
dna_vocab_path = os.path.join(pretrained_model_path, "dna_vocab_list.txt")
|
46 |
rna_vocab_path = os.path.join(pretrained_model_path, "rna_vocab_list.txt")
|
47 |
protein_vocab_path = os.path.join(pretrained_model_path, "protein_vocab_list.txt")
|
48 |
+
print(f">>> DEBUG: dna_vocab_path will be: '{dna_vocab_path}'") # Add this
|
49 |
+
print(f">>> DEBUG: Checking if dna_vocab_path exists: {os.path.exists(dna_vocab_path)}")
|
50 |
+
print(f">>> DEBUG: Checking if dna_vocab_list.txt exists in CWD: {os.path.exists('dna_vocab_list.txt')}")
|
51 |
|
52 |
dna_hf_tokenizer = EsmTokenizer(dna_vocab_path, model_max_length=196608)
|
53 |
dna_hf_tokenizer.eos_token = None # Stops the tokenizer adding an EOS/SEP token at the end
|