yl4579 commited on
Commit
d6d659c
·
verified ·
1 Parent(s): 24657a7

Update f5_tts/model/utils.py

Browse files
Files changed (1) hide show
  1. f5_tts/model/utils.py +2 -3
f5_tts/model/utils.py CHANGED
@@ -109,8 +109,7 @@ def get_tokenizer(dataset_name, tokenizer: str = "pinyin"):
109
  - if use "byte", set to 256 (unicode byte range)
110
  """
111
  if tokenizer in ["pinyin", "char"]:
112
- tokenizer_path = str(f'../../vocab.txt')
113
-
114
  with open(tokenizer_path, "r", encoding="utf-8") as f:
115
  vocab_char_map = {}
116
  for i, char in enumerate(f):
@@ -132,6 +131,7 @@ def get_tokenizer(dataset_name, tokenizer: str = "pinyin"):
132
  return vocab_char_map, vocab_size
133
 
134
 
 
135
  # convert char to pinyin
136
 
137
  jieba.initialize()
@@ -247,4 +247,3 @@ def sample_from_list(float_list, N):
247
  random_samples = float_tensor[random_indices]
248
 
249
  return random_samples
250
-
 
109
  - if use "byte", set to 256 (unicode byte range)
110
  """
111
  if tokenizer in ["pinyin", "char"]:
112
+ tokenizer_path = os.path.join(files("f5_tts").joinpath("../../data"), f"{dataset_name}_{tokenizer}/vocab.txt")
 
113
  with open(tokenizer_path, "r", encoding="utf-8") as f:
114
  vocab_char_map = {}
115
  for i, char in enumerate(f):
 
131
  return vocab_char_map, vocab_size
132
 
133
 
134
+
135
  # convert char to pinyin
136
 
137
  jieba.initialize()
 
247
  random_samples = float_tensor[random_indices]
248
 
249
  return random_samples