import os import torch import gradio as gr from TTS.tts.configs.xtts_config import XttsConfig from TTS.api import TTS # ✅ Accept Coqui License Automatically os.environ["COQUI_TOS_AGREED"] = "1" # ✅ Allow `XttsConfig` in PyTorch's safe globals torch.serialization.add_safe_globals([XttsConfig]) # ✅ Initialize XTTS Model (CPU-only) tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False).to("cpu") print("✅ Model loaded successfully!") print(f"✅ Supported Languages: {tts.languages}") # ✅ FIXED # ✅ Function for Voice Cloning def generate_cloned_voice(text, reference_audio, language): output_path = "output.wav" if not text.strip(): return "Error: Please enter some text.", None if not reference_audio: return "Error: Please upload a reference audio file.", None try: print("🔄 Processing voice cloning...") tts.tts_to_file(text=text, speaker_wav=reference_audio, file_path=output_path, language=language) print("✅ Voice cloning complete!") return output_path except Exception as e: print(f"❌ Error: {str(e)}") return f"Error: {str(e)}", None # ✅ Create the Gradio Interface interface = gr.Interface( fn=generate_cloned_voice, inputs=[ gr.Textbox(label="Enter Translated Text"), gr.Audio(label="Upload Reference Audio", type="filepath"), gr.Dropdown(tts.languages, label="Select Target Language", value="en") # ✅ FIXED ], outputs=gr.Audio(label="Generated Cloned Voice"), title="Free Voice Cloning API", description="Upload a sample voice and input text. Select a language, and the system will generate the text in the same voice.", allow_flagging="never", concurrency_limit=1 # Prevents multiple processes from running at once ) # ✅ Launch the Gradio App interface.launch()