pbordesinstadeep commited on
Commit
f3db296
·
verified ·
1 Parent(s): 9378c69

Update isoformer_tokenizer.py

Browse files
Files changed (1) hide show
  1. isoformer_tokenizer.py +6 -1
isoformer_tokenizer.py CHANGED
@@ -38,11 +38,16 @@ class IsoformerTokenizer(PreTrainedTokenizer):
38
  self,
39
  **kwargs
40
  ):
41
-
42
  pretrained_model_path = kwargs.get("pretrained_model_name_or_path", "")
 
 
43
  dna_vocab_path = os.path.join(pretrained_model_path, "dna_vocab_list.txt")
44
  rna_vocab_path = os.path.join(pretrained_model_path, "rna_vocab_list.txt")
45
  protein_vocab_path = os.path.join(pretrained_model_path, "protein_vocab_list.txt")
 
 
 
46
 
47
  dna_hf_tokenizer = EsmTokenizer(dna_vocab_path, model_max_length=196608)
48
  dna_hf_tokenizer.eos_token = None # Stops the tokenizer adding an EOS/SEP token at the end
 
38
  self,
39
  **kwargs
40
  ):
41
+ print(f"\n>>> DEBUG: IsoformerTokenizer __init__ received kwargs: {kwargs}")
42
  pretrained_model_path = kwargs.get("pretrained_model_name_or_path", "")
43
+ print(f">>> DEBUG: Determined pretrained_model_path: '{pretrained_model_path}'")
44
+
45
  dna_vocab_path = os.path.join(pretrained_model_path, "dna_vocab_list.txt")
46
  rna_vocab_path = os.path.join(pretrained_model_path, "rna_vocab_list.txt")
47
  protein_vocab_path = os.path.join(pretrained_model_path, "protein_vocab_list.txt")
48
+ print(f">>> DEBUG: dna_vocab_path will be: '{dna_vocab_path}'") # Add this
49
+ print(f">>> DEBUG: Checking if dna_vocab_path exists: {os.path.exists(dna_vocab_path)}")
50
+ print(f">>> DEBUG: Checking if dna_vocab_list.txt exists in CWD: {os.path.exists('dna_vocab_list.txt')}")
51
 
52
  dna_hf_tokenizer = EsmTokenizer(dna_vocab_path, model_max_length=196608)
53
  dna_hf_tokenizer.eos_token = None # Stops the tokenizer adding an EOS/SEP token at the end