feat: try fix...
Browse files- app.py +5 -4
- requirements.txt +2 -2
app.py
CHANGED
@@ -112,18 +112,18 @@ LANGUAGES = {
|
|
112 |
SYSTEM_PROMPT = "You are a speech synthesizer that generates natural, realistic, and human-like conversational audio from dialogue text."
|
113 |
MODEL_PATH = "fnlp/MOSS-TTSD-v0"
|
114 |
SPT_CONFIG_PATH = "XY_Tokenizer/config/xy_tokenizer_config.yaml"
|
115 |
-
SPT_CHECKPOINT_PATH = "XY_Tokenizer/weights/xy_tokenizer.ckpt"
|
116 |
MAX_CHANNELS = 8
|
117 |
|
118 |
from huggingface_hub import hf_hub_download
|
119 |
|
120 |
-
|
121 |
repo_id="fnlp/XY_Tokenizer_TTSD_V0",
|
122 |
filename="xy_tokenizer.ckpt",
|
123 |
cache_dir="XY_Tokenizer/weights"
|
124 |
)
|
125 |
|
126 |
-
print("Checkpoint downloaded to:",
|
127 |
|
128 |
# Global variables for caching loaded models
|
129 |
tokenizer = None
|
@@ -137,7 +137,8 @@ def initialize_model():
|
|
137 |
|
138 |
if tokenizer is None:
|
139 |
print("Initializing model...")
|
140 |
-
device = "cuda"
|
|
|
141 |
tokenizer, model, spt = load_model(MODEL_PATH, SPT_CONFIG_PATH, SPT_CHECKPOINT_PATH)
|
142 |
spt = spt.to(device)
|
143 |
model = model.to(device)
|
|
|
112 |
SYSTEM_PROMPT = "You are a speech synthesizer that generates natural, realistic, and human-like conversational audio from dialogue text."
|
113 |
MODEL_PATH = "fnlp/MOSS-TTSD-v0"
|
114 |
SPT_CONFIG_PATH = "XY_Tokenizer/config/xy_tokenizer_config.yaml"
|
115 |
+
# SPT_CHECKPOINT_PATH = "XY_Tokenizer/weights/xy_tokenizer.ckpt"
|
116 |
MAX_CHANNELS = 8
|
117 |
|
118 |
from huggingface_hub import hf_hub_download
|
119 |
|
120 |
+
SPT_CHECKPOINT_PATH = hf_hub_download(
|
121 |
repo_id="fnlp/XY_Tokenizer_TTSD_V0",
|
122 |
filename="xy_tokenizer.ckpt",
|
123 |
cache_dir="XY_Tokenizer/weights"
|
124 |
)
|
125 |
|
126 |
+
print("Checkpoint downloaded to:", SPT_CHECKPOINT_PATH)
|
127 |
|
128 |
# Global variables for caching loaded models
|
129 |
tokenizer = None
|
|
|
137 |
|
138 |
if tokenizer is None:
|
139 |
print("Initializing model...")
|
140 |
+
device = "cuda"
|
141 |
+
print(f"Using {device}")
|
142 |
tokenizer, model, spt = load_model(MODEL_PATH, SPT_CONFIG_PATH, SPT_CHECKPOINT_PATH)
|
143 |
spt = spt.to(device)
|
144 |
model = model.to(device)
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
torch
|
2 |
torchaudio>=2.0.0
|
3 |
transformers>=4.30.0
|
4 |
gradio>=4.0.0
|
@@ -14,4 +14,4 @@ openai
|
|
14 |
PyYAML
|
15 |
einops
|
16 |
huggingface_hub
|
17 |
-
https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2+cu12torch2.
|
|
|
1 |
+
torch==2.5.1
|
2 |
torchaudio>=2.0.0
|
3 |
transformers>=4.30.0
|
4 |
gradio>=4.0.0
|
|
|
14 |
PyYAML
|
15 |
einops
|
16 |
huggingface_hub
|
17 |
+
https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2+cu12torch2.5cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
|