|
import sys |
|
import os |
|
|
|
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), "src")) |
|
|
|
import gdown |
|
import gradio as gr |
|
import torch |
|
import yaml |
|
import numpy as np |
|
from f5_tts.infer.utils_infer import build_model_from_config, inference |
|
|
|
|
|
MODEL_URL = "https://drive.google.com/uc?id=1llj4Z3uzKCXL_0EIuXqIFtXYm6lraaIz" |
|
VOCAB_URL = "https://drive.google.com/uc?id=1YNluHbc_bqhj7B1wp9by4U0-LfCPCeLL" |
|
|
|
|
|
MODEL_PATH = "checkpoints/model_1250000.safetensors" |
|
VOCAB_PATH = "checkpoints/vocab.txt" |
|
CONFIG_PATH = "src/f5_tts/configs/F5TTS_Base.yaml" |
|
|
|
|
|
os.makedirs("checkpoints", exist_ok=True) |
|
|
|
|
|
if not os.path.exists(MODEL_PATH): |
|
print("🔽 Downloading model...") |
|
gdown.download(MODEL_URL, MODEL_PATH, quiet=False) |
|
|
|
if not os.path.exists(VOCAB_PATH): |
|
print("🔽 Downloading vocab...") |
|
gdown.download(VOCAB_URL, VOCAB_PATH, quiet=False) |
|
|
|
|
|
if not os.path.exists(CONFIG_PATH): |
|
raise FileNotFoundError(f"Config file not found: {CONFIG_PATH}") |
|
|
|
with open(CONFIG_PATH, "r") as f: |
|
config = yaml.safe_load(f) |
|
|
|
|
|
model = build_model_from_config(config, MODEL_PATH, VOCAB_PATH) |
|
|
|
|
|
def infer_text(text): |
|
wav = inference(model, text, config) |
|
return (22050, np.array(wav)) |
|
|
|
|
|
demo = gr.Interface( |
|
fn=infer_text, |
|
inputs="text", |
|
outputs="audio", |
|
title="F5-TTS Text-to-Speech" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|