Spaces:

shukdevdatta123
/

VocalForge-AI

Running

File size: 1,626 Bytes

3220f5e
f74edeb
3220f5e
f8f4a26
f74edeb
 
 
f8f4a26
f74edeb
 
 
f8f4a26
f74edeb
 
 
f8f4a26
f74edeb
 
 
 
 
 
 
 
3220f5e
f74edeb
 
 
 
3220f5e
 
db74fa9
f74edeb
3220f5e
f74edeb
 
3220f5e
f74edeb
 
 
 
 
 
3220f5e
f74edeb

import gradio as gr
from TTS.api import TTS
import numpy as np

# Load the YourTTS model once at startup
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)
sample_rate = tts.synthesizer.output_sample_rate

def generate_speech(reference_audio, text):
    """
    Generate speech audio mimicking the voice from the reference audio.
    
    Parameters:
    reference_audio (str): Filepath to the uploaded voice sample.
    text (str): Text to convert to speech.
    
    Returns:
    tuple: (audio waveform as numpy array, sample rate)
    """
    # Generate speech using the reference audio and text
    wav = tts.tts(text=text, speaker_wav=reference_audio, language="en")
    # Convert list to numpy array for Gradio
    wav_np = np.array(wav)
    return (wav_np, sample_rate)

# Build the Gradio interface
with gr.Blocks(title="Voice Cloning TTS") as app:
    gr.Markdown("## Voice Cloning Text-to-Speech")
    gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
    
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
        text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
    
    generate_btn = gr.Button("Generate Speech")
    audio_output = gr.Audio(label="Generated Speech", interactive=False)
    
    # Connect the button to the generation function
    generate_btn.click(
        fn=generate_speech,
        inputs=[audio_input, text_input],
        outputs=audio_output
    )

# Launch the application
app.launch()