|
import gradio as gr |
|
from TTS.api import TTS |
|
import numpy as np |
|
from scipy.io import wavfile |
|
import tempfile |
|
import os |
|
|
|
|
|
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False) |
|
sample_rate = tts.synthesizer.output_sample_rate |
|
|
|
def generate_speech(reference_audio, text): |
|
""" |
|
Generate speech audio mimicking the voice from the reference audio. |
|
|
|
Parameters: |
|
reference_audio (str): Filepath to the uploaded voice sample. |
|
text (str): Text to convert to speech. |
|
|
|
Returns: |
|
str: Path to the generated audio file |
|
""" |
|
|
|
wav = tts.tts(text=text, speaker_wav=reference_audio, language="en") |
|
|
|
wav_np = np.array(wav, dtype=np.float32) |
|
|
|
|
|
temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) |
|
temp_file_path = temp_file.name |
|
|
|
wavfile.write(temp_file_path, sample_rate, wav_np) |
|
temp_file.close() |
|
|
|
return temp_file_path |
|
|
|
|
|
with gr.Blocks(title="Voice Cloning TTS") as app: |
|
gr.Markdown("## Voice Cloning Text-to-Speech") |
|
gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!") |
|
|
|
with gr.Row(): |
|
audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)") |
|
text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate") |
|
|
|
generate_btn = gr.Button("Generate Speech") |
|
audio_output = gr.Audio(label="Generated Speech", interactive=False) |
|
|
|
|
|
generate_btn.click( |
|
fn=generate_speech, |
|
inputs=[audio_input, text_input], |
|
outputs=audio_output |
|
) |
|
|
|
|
|
app.launch() |