File size: 2,080 Bytes
4760b00
4999708
 
ba92b2d
47d7c50
c4c3acd
8595d15
47d7c50
8595d15
743a58d
8595d15
f8f4a26
4999708
 
4760b00
4999708
 
 
 
 
 
 
ba92b2d
4999708
 
 
 
 
ba92b2d
4999708
 
 
 
 
 
 
 
3220f5e
4999708
 
 
 
3220f5e
 
4999708
 
3220f5e
4999708
 
3220f5e
4999708
 
f74edeb
 
 
 
3220f5e
4999708
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
import tempfile
import torch
from numpy.core.multiarray import scalar
import numpy

# Add NumPy scalar and dtype to safe globals to fix UnpicklingError
# This allowlists numpy.dtype to cover specific instances like numpy.dtype[float64]
torch.serialization.add_safe_globals([scalar, numpy.dtype])

# Preload the models at startup
preload_models()

def generate_speech(reference_audio, text):
    """
    Generate speech audio mimicking the voice from the reference audio using Bark.
    
    Parameters:
    reference_audio (str): Filepath to the uploaded voice sample.
    text (str): Text to convert to speech.
    
    Returns:
    str: Path to the generated audio file
    """
    # Generate speech using the reference audio and text
    audio_array = generate_audio(text, history_prompt=reference_audio)
    
    # Create a temporary file to save the audio
    temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    temp_file_path = temp_file.name
    # Save the audio to the temporary file
    write_wav(temp_file_path, SAMPLE_RATE, audio_array)
    temp_file.close()
    
    return temp_file_path

# Build the Gradio interface
with gr.Blocks(title="Voice Cloning TTS with Bark") as app:
    gr.Markdown("## Voice Cloning Text-to-Speech with Bark")
    gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
    
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
        text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
    
    generate_btn = gr.Button("Generate Speech")
    audio_output = gr.Audio(label="Generated Speech", interactive=False)
    
    # Connect the button to the generation function
    generate_btn.click(
        fn=generate_speech,
        inputs=[audio_input, text_input],
        outputs=audio_output
    )

# Launch the application
app.launch()