import os import torch import gradio as gr from TTS.tts.configs.xtts_config import XttsConfig from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs # ✅ Import missing class from TTS.api import TTS # ✅ Accept Coqui License Automatically os.environ["COQUI_TOS_AGREED"] = "1" # ✅ Allow missing classes for safe deserialization torch.serialization.add_safe_globals([XttsConfig, XttsAudioConfig, BaseDatasetConfig]) torch.serialization.safe_globals([XttsArgs]) # ✅ Force full checkpoint loading def safe_load_checkpoint(model_path): return torch.load(model_path, map_location="cpu", weights_only=False) # ✅ Force full deserialization # ✅ Initialize TTS model model_name = "tts_models/multilingual/multi-dataset/xtts_v2" tts = TTS(model_name=model_name).to("cpu") # ✅ Ensure CPU usage def generate_cloned_voice(text, reference_audio, language): output_path = "output.wav" # Generate cloned speech with language specification tts.tts_to_file(text=text, speaker_wav=reference_audio, file_path=output_path, language=language) return output_path print(tts.languages) # Create the Gradio interface interface = gr.Interface( fn=generate_cloned_voice, inputs=[ gr.Textbox(label="Enter Translated Text"), gr.Audio(label="Upload Reference Audio", type="filepath"), gr.Dropdown(["en", "fr", "de", "es", "it"], label="Select Target Language", value="en") ], outputs=gr.Audio(label="Generated Cloned Voice"), title="Free Voice Cloning API", description="Upload a sample voice and input text. Select a language, and the system will generate the text in the same voice." ) # Launch the Gradio app interface.launch()