VocalForge-AI / v1.txt
shukdevdatta123's picture
Create v1.txt
af961e5 verified
import gradio as gr
from TTS.api import TTS
import numpy as np
from scipy.io import wavfile
import tempfile
import os
# Load the YourTTS model once at startup
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)
sample_rate = tts.synthesizer.output_sample_rate
def generate_speech(reference_audio, text):
"""
Generate speech audio mimicking the voice from the reference audio.
Parameters:
reference_audio (str): Filepath to the uploaded voice sample.
text (str): Text to convert to speech.
Returns:
str: Path to the generated audio file
"""
# Generate speech using the reference audio and text
wav = tts.tts(text=text, speaker_wav=reference_audio, language="en")
# Convert list to numpy array
wav_np = np.array(wav, dtype=np.float32)
# Create a temporary file to save the audio
temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
temp_file_path = temp_file.name
# Save the audio to the temporary file
wavfile.write(temp_file_path, sample_rate, wav_np)
temp_file.close()
return temp_file_path
# Build the Gradio interface
with gr.Blocks(title="Voice Cloning TTS") as app:
gr.Markdown("## Voice Cloning Text-to-Speech")
gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
with gr.Row():
audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
generate_btn = gr.Button("Generate Speech")
audio_output = gr.Audio(label="Generated Speech", interactive=False)
# Connect the button to the generation function
generate_btn.click(
fn=generate_speech,
inputs=[audio_input, text_input],
outputs=audio_output
)
# Launch the application
app.launch()