File size: 1,954 Bytes
4760b00
4cc61f6
e25f277
4cc61f6
 
 
4ee577e
4cc61f6
 
 
e25f277
4999708
 
4cc61f6
 
4999708
e25f277
4999708
4cc61f6
4999708
4cc61f6
4999708
4cc61f6
 
 
 
 
 
 
 
 
 
 
 
4999708
3220f5e
4999708
4cc61f6
 
 
 
e25f277
4cc61f6
 
 
e25f277
4999708
4cc61f6
e25f277
4999708
f74edeb
 
 
 
3220f5e
4999708
4cc61f6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
from TTS.api import TTS
import numpy as np
from scipy.io import wavfile
import tempfile
import os

# Load the YourTTS model once at startup
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)
sample_rate = tts.synthesizer.output_sample_rate

def generate_speech(reference_audio, text):
    """
    Generate speech audio mimicking the voice from the reference audio.
    
    Parameters:
    reference_audio (str): Filepath to the uploaded voice sample.
    text (str): Text to convert to speech.
    
    Returns:
    str: Path to the generated audio file
    """
    # Generate speech using the reference audio and text
    wav = tts.tts(text=text, speaker_wav=reference_audio, language="en")
    # Convert list to numpy array
    wav_np = np.array(wav, dtype=np.float32)
    
    # Create a temporary file to save the audio
    temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    temp_file_path = temp_file.name
    # Save the audio to the temporary file
    wavfile.write(temp_file_path, sample_rate, wav_np)
    temp_file.close()
    
    return temp_file_path

# Build the Gradio interface
with gr.Blocks(title="Voice Cloning TTS") as app:
    gr.Markdown("## Voice Cloning Text-to-Speech")
    gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
    
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
        text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
    
    generate_btn = gr.Button("Generate Speech")
    audio_output = gr.Audio(label="Generated Speech", interactive=False)
    
    # Connect the button to the generation function
    generate_btn.click(
        fn=generate_speech,
        inputs=[audio_input, text_input],
        outputs=audio_output
    )

# Launch the application
app.launch()