import sys import os # Make sure Python can find src/f5_tts sys.path.append(os.path.join(os.path.dirname(__file__), "src")) import gdown import gradio as gr import torch import yaml import numpy as np from f5_tts.infer.utils_infer import build_model_from_config, inference # Google Drive model + vocab links MODEL_URL = "https://drive.google.com/uc?id=1llj4Z3uzKCXL_0EIuXqIFtXYm6lraaIz" VOCAB_URL = "https://drive.google.com/uc?id=1YNluHbc_bqhj7B1wp9by4U0-LfCPCeLL" # Local storage paths (download at runtime) MODEL_PATH = "checkpoints/model_1250000.safetensors" VOCAB_PATH = "checkpoints/vocab.txt" CONFIG_PATH = "src/f5_tts/configs/F5TTS_Base.yaml" # Create checkpoints folder os.makedirs("checkpoints", exist_ok=True) # Download model + vocab if not already present if not os.path.exists(MODEL_PATH): print("🔽 Downloading model...") gdown.download(MODEL_URL, MODEL_PATH, quiet=False) if not os.path.exists(VOCAB_PATH): print("🔽 Downloading vocab...") gdown.download(VOCAB_URL, VOCAB_PATH, quiet=False) # Load model config if not os.path.exists(CONFIG_PATH): raise FileNotFoundError(f"Config file not found: {CONFIG_PATH}") with open(CONFIG_PATH, "r") as f: config = yaml.safe_load(f) # Build model model = build_model_from_config(config, MODEL_PATH, VOCAB_PATH) # Inference function for Gradio def infer_text(text): wav = inference(model, text, config) return (22050, np.array(wav)) # Gradio UI demo = gr.Interface( fn=infer_text, inputs="text", outputs="audio", title="F5-TTS Text-to-Speech" ) if __name__ == "__main__": demo.launch()