File size: 1,954 Bytes
4760b00 4cc61f6 e25f277 4cc61f6 4ee577e 4cc61f6 e25f277 4999708 4cc61f6 4999708 e25f277 4999708 4cc61f6 4999708 4cc61f6 4999708 4cc61f6 4999708 3220f5e 4999708 4cc61f6 e25f277 4cc61f6 e25f277 4999708 4cc61f6 e25f277 4999708 f74edeb 3220f5e 4999708 4cc61f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import gradio as gr
from TTS.api import TTS
import numpy as np
from scipy.io import wavfile
import tempfile
import os
# Load the YourTTS model once at startup
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)
sample_rate = tts.synthesizer.output_sample_rate
def generate_speech(reference_audio, text):
"""
Generate speech audio mimicking the voice from the reference audio.
Parameters:
reference_audio (str): Filepath to the uploaded voice sample.
text (str): Text to convert to speech.
Returns:
str: Path to the generated audio file
"""
# Generate speech using the reference audio and text
wav = tts.tts(text=text, speaker_wav=reference_audio, language="en")
# Convert list to numpy array
wav_np = np.array(wav, dtype=np.float32)
# Create a temporary file to save the audio
temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
temp_file_path = temp_file.name
# Save the audio to the temporary file
wavfile.write(temp_file_path, sample_rate, wav_np)
temp_file.close()
return temp_file_path
# Build the Gradio interface
with gr.Blocks(title="Voice Cloning TTS") as app:
gr.Markdown("## Voice Cloning Text-to-Speech")
gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
with gr.Row():
audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
generate_btn = gr.Button("Generate Speech")
audio_output = gr.Audio(label="Generated Speech", interactive=False)
# Connect the button to the generation function
generate_btn.click(
fn=generate_speech,
inputs=[audio_input, text_input],
outputs=audio_output
)
# Launch the application
app.launch() |