Spaces:
Running
on
Zero
Running
on
Zero
Update f5_tts/model/utils.py
Browse files- f5_tts/model/utils.py +2 -3
f5_tts/model/utils.py
CHANGED
@@ -109,8 +109,7 @@ def get_tokenizer(dataset_name, tokenizer: str = "pinyin"):
|
|
109 |
- if use "byte", set to 256 (unicode byte range)
|
110 |
"""
|
111 |
if tokenizer in ["pinyin", "char"]:
|
112 |
-
tokenizer_path =
|
113 |
-
|
114 |
with open(tokenizer_path, "r", encoding="utf-8") as f:
|
115 |
vocab_char_map = {}
|
116 |
for i, char in enumerate(f):
|
@@ -132,6 +131,7 @@ def get_tokenizer(dataset_name, tokenizer: str = "pinyin"):
|
|
132 |
return vocab_char_map, vocab_size
|
133 |
|
134 |
|
|
|
135 |
# convert char to pinyin
|
136 |
|
137 |
jieba.initialize()
|
@@ -247,4 +247,3 @@ def sample_from_list(float_list, N):
|
|
247 |
random_samples = float_tensor[random_indices]
|
248 |
|
249 |
return random_samples
|
250 |
-
|
|
|
109 |
- if use "byte", set to 256 (unicode byte range)
|
110 |
"""
|
111 |
if tokenizer in ["pinyin", "char"]:
|
112 |
+
tokenizer_path = os.path.join(files("f5_tts").joinpath("../../data"), f"{dataset_name}_{tokenizer}/vocab.txt")
|
|
|
113 |
with open(tokenizer_path, "r", encoding="utf-8") as f:
|
114 |
vocab_char_map = {}
|
115 |
for i, char in enumerate(f):
|
|
|
131 |
return vocab_char_map, vocab_size
|
132 |
|
133 |
|
134 |
+
|
135 |
# convert char to pinyin
|
136 |
|
137 |
jieba.initialize()
|
|
|
247 |
random_samples = float_tensor[random_indices]
|
248 |
|
249 |
return random_samples
|
|