import gradio as gr from bark import SAMPLE_RATE, generate_audio, preload_models from scipy.io.wavfile import write as write_wav import tempfile import torch # Save the original torch.load function original_load = torch.load # Define a custom load function that forces weights_only=False def custom_load(*args, **kwargs): kwargs['weights_only'] = False return original_load(*args, **kwargs) # Monkey-patch torch.load with the custom function torch.load = custom_load # Preload the models with the patched torch.load preload_models() # Restore the original torch.load function torch.load = original_load def generate_speech(reference_audio, text): """ Generate speech audio mimicking the voice from the reference audio using Bark. Parameters: reference_audio (str): Filepath to the uploaded voice sample. text (str): Text to convert to speech. Returns: str: Path to the generated audio file """ # Generate speech using the reference audio and text audio_array = generate_audio(text, history_prompt=reference_audio) # Create a temporary file to save the audio temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) temp_file_path = temp_file.name # Save the audio to the temporary file write_wav(temp_file_path, SAMPLE_RATE, audio_array) temp_file.close() return temp_file_path # Build the Gradio interface with gr.Blocks(title="Voice Cloning TTS with Bark") as app: gr.Markdown("## Voice Cloning Text-to-Speech with Bark") gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!") with gr.Row(): audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)") text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate") generate_btn = gr.Button("Generate Speech") audio_output = gr.Audio(label="Generated Speech", interactive=False) # Connect the button to the generation function generate_btn.click( fn=generate_speech, inputs=[audio_input, text_input], outputs=audio_output ) # Launch the application app.launch()