Spaces:
Running
Running
File size: 2,080 Bytes
4760b00 4999708 ba92b2d 47d7c50 c4c3acd 8595d15 47d7c50 8595d15 743a58d 8595d15 f8f4a26 4999708 4760b00 4999708 ba92b2d 4999708 ba92b2d 4999708 3220f5e 4999708 3220f5e 4999708 3220f5e 4999708 3220f5e 4999708 f74edeb 3220f5e 4999708 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import gradio as gr
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
import tempfile
import torch
from numpy.core.multiarray import scalar
import numpy
# Add NumPy scalar and dtype to safe globals to fix UnpicklingError
# This allowlists numpy.dtype to cover specific instances like numpy.dtype[float64]
torch.serialization.add_safe_globals([scalar, numpy.dtype])
# Preload the models at startup
preload_models()
def generate_speech(reference_audio, text):
"""
Generate speech audio mimicking the voice from the reference audio using Bark.
Parameters:
reference_audio (str): Filepath to the uploaded voice sample.
text (str): Text to convert to speech.
Returns:
str: Path to the generated audio file
"""
# Generate speech using the reference audio and text
audio_array = generate_audio(text, history_prompt=reference_audio)
# Create a temporary file to save the audio
temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
temp_file_path = temp_file.name
# Save the audio to the temporary file
write_wav(temp_file_path, SAMPLE_RATE, audio_array)
temp_file.close()
return temp_file_path
# Build the Gradio interface
with gr.Blocks(title="Voice Cloning TTS with Bark") as app:
gr.Markdown("## Voice Cloning Text-to-Speech with Bark")
gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
with gr.Row():
audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
generate_btn = gr.Button("Generate Speech")
audio_output = gr.Audio(label="Generated Speech", interactive=False)
# Connect the button to the generation function
generate_btn.click(
fn=generate_speech,
inputs=[audio_input, text_input],
outputs=audio_output
)
# Launch the application
app.launch() |