Spaces:
Sleeping
Sleeping
import os | |
import torch | |
import gradio as gr | |
from TTS.tts.configs.xtts_config import XttsConfig | |
from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs | |
# β Import missing class | |
from TTS.api import TTS | |
# β Accept Coqui License Automatically | |
os.environ["COQUI_TOS_AGREED"] = "1" | |
# β Allow missing classes for safe deserialization | |
torch.serialization.add_safe_globals([XttsConfig, XttsAudioConfig, BaseDatasetConfig]) | |
torch.serialization.safe_globals([XttsArgs]) | |
# β Force full checkpoint loading | |
def safe_load_checkpoint(model_path): | |
return torch.load(model_path, map_location="cpu", weights_only=False) # β Force full deserialization | |
# β Initialize TTS model | |
model_name = "tts_models/multilingual/multi-dataset/xtts_v2" | |
tts = TTS(model_name=model_name).to("cpu") # β Ensure CPU usage | |
def generate_cloned_voice(text, reference_audio, language): | |
output_path = "output.wav" | |
# Generate cloned speech with language specification | |
tts.tts_to_file(text=text, speaker_wav=reference_audio, file_path=output_path, language=language) | |
return output_path | |
print(tts.languages) | |
# Create the Gradio interface | |
interface = gr.Interface( | |
fn=generate_cloned_voice, | |
inputs=[ | |
gr.Textbox(label="Enter Translated Text"), | |
gr.Audio(label="Upload Reference Audio", type="filepath"), | |
gr.Dropdown(["en", "fr", "de", "es", "it"], label="Select Target Language", value="en") | |
], | |
outputs=gr.Audio(label="Generated Cloned Voice"), | |
title="Free Voice Cloning API", | |
description="Upload a sample voice and input text. Select a language, and the system will generate the text in the same voice." | |
) | |
# Launch the Gradio app | |
interface.launch() | |