Spaces:
Running
Running
import gradio as gr | |
from TTS.api import TTS | |
import numpy as np | |
from scipy.io import wavfile | |
import tempfile | |
import os | |
# Load the YourTTS model once at startup | |
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False) | |
sample_rate = tts.synthesizer.output_sample_rate | |
def generate_speech(reference_audio, text): | |
""" | |
Generate speech audio mimicking the voice from the reference audio. | |
Parameters: | |
reference_audio (str): Filepath to the uploaded voice sample. | |
text (str): Text to convert to speech. | |
Returns: | |
str: Path to the generated audio file | |
""" | |
# Generate speech using the reference audio and text | |
wav = tts.tts(text=text, speaker_wav=reference_audio, language="en") | |
# Convert list to numpy array | |
wav_np = np.array(wav, dtype=np.float32) | |
# Create a temporary file to save the audio | |
temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
temp_file_path = temp_file.name | |
# Save the audio to the temporary file | |
wavfile.write(temp_file_path, sample_rate, wav_np) | |
temp_file.close() | |
return temp_file_path | |
# Build the Gradio interface | |
with gr.Blocks(title="Voice Cloning TTS") as app: | |
gr.Markdown("## Voice Cloning Text-to-Speech") | |
gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!") | |
with gr.Row(): | |
audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)") | |
text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate") | |
generate_btn = gr.Button("Generate Speech") | |
audio_output = gr.Audio(label="Generated Speech", interactive=False) | |
# Connect the button to the generation function | |
generate_btn.click( | |
fn=generate_speech, | |
inputs=[audio_input, text_input], | |
outputs=audio_output | |
) | |
# Launch the application | |
app.launch() |