Spaces:
Running
Running
File size: 1,954 Bytes
af961e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import gradio as gr from TTS.api import TTS import numpy as np from scipy.io import wavfile import tempfile import os # Load the YourTTS model once at startup tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False) sample_rate = tts.synthesizer.output_sample_rate def generate_speech(reference_audio, text): """ Generate speech audio mimicking the voice from the reference audio. Parameters: reference_audio (str): Filepath to the uploaded voice sample. text (str): Text to convert to speech. Returns: str: Path to the generated audio file """ # Generate speech using the reference audio and text wav = tts.tts(text=text, speaker_wav=reference_audio, language="en") # Convert list to numpy array wav_np = np.array(wav, dtype=np.float32) # Create a temporary file to save the audio temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) temp_file_path = temp_file.name # Save the audio to the temporary file wavfile.write(temp_file_path, sample_rate, wav_np) temp_file.close() return temp_file_path # Build the Gradio interface with gr.Blocks(title="Voice Cloning TTS") as app: gr.Markdown("## Voice Cloning Text-to-Speech") gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!") with gr.Row(): audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)") text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate") generate_btn = gr.Button("Generate Speech") audio_output = gr.Audio(label="Generated Speech", interactive=False) # Connect the button to the generation function generate_btn.click( fn=generate_speech, inputs=[audio_input, text_input], outputs=audio_output ) # Launch the application app.launch() |