File size: 1,626 Bytes
3220f5e
f74edeb
3220f5e
f8f4a26
f74edeb
 
 
f8f4a26
f74edeb
 
 
f8f4a26
f74edeb
 
 
f8f4a26
f74edeb
 
 
 
 
 
 
 
3220f5e
f74edeb
 
 
 
3220f5e
 
db74fa9
f74edeb
3220f5e
f74edeb
 
3220f5e
f74edeb
 
 
 
 
 
3220f5e
f74edeb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gradio as gr
from TTS.api import TTS
import numpy as np

# Load the YourTTS model once at startup
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)
sample_rate = tts.synthesizer.output_sample_rate

def generate_speech(reference_audio, text):
    """
    Generate speech audio mimicking the voice from the reference audio.
    
    Parameters:
    reference_audio (str): Filepath to the uploaded voice sample.
    text (str): Text to convert to speech.
    
    Returns:
    tuple: (audio waveform as numpy array, sample rate)
    """
    # Generate speech using the reference audio and text
    wav = tts.tts(text=text, speaker_wav=reference_audio, language="en")
    # Convert list to numpy array for Gradio
    wav_np = np.array(wav)
    return (wav_np, sample_rate)

# Build the Gradio interface
with gr.Blocks(title="Voice Cloning TTS") as app:
    gr.Markdown("## Voice Cloning Text-to-Speech")
    gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
    
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
        text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
    
    generate_btn = gr.Button("Generate Speech")
    audio_output = gr.Audio(label="Generated Speech", interactive=False)
    
    # Connect the button to the generation function
    generate_btn.click(
        fn=generate_speech,
        inputs=[audio_input, text_input],
        outputs=audio_output
    )

# Launch the application
app.launch()