import gradio as gr import numpy as np import os import time from scipy.io import wavfile # Explicitly import Bark components from bark import generate_audio, SAMPLE_RATE from bark.generation import preload_models class VoiceCloningApp: def __init__(self): # Create working directory self.base_dir = os.path.dirname(os.path.abspath(__file__)) self.working_dir = os.path.join(self.base_dir, "working_files") os.makedirs(self.working_dir, exist_ok=True) # Explicit model loading with error handling try: print("Attempting to load Bark models...") preload_models() print("Bark models loaded successfully.") except Exception as e: print(f"Error loading Bark models: {e}") import traceback traceback.print_exc() raise RuntimeError(f"Could not load Bark models. Error: {e}") def process_reference_audio(self, audio_data): """Simple audio processing""" if audio_data is None: return "Please provide an audio input" try: # Unpack audio data sample_rate, audio_array = audio_data # Normalize audio audio_array = audio_array / np.max(np.abs(audio_array)) # Save reference audio filename = f"reference_{int(time.time())}.wav" filepath = os.path.join(self.working_dir, filename) wavfile.write(filepath, sample_rate, audio_array) return "✅ Audio captured successfully!" except Exception as e: return f"Error processing audio: {str(e)}" def generate_speech(self, text): """Generate speech using Bark""" if not text or not text.strip(): return None, "Please enter some text to speak" try: # Generate audio with explicit error handling print(f"Generating speech for text: {text}") # Simplified audio generation audio_array = generate_audio( text, history_prompt=None, temp=0.7 ) # Save generated audio filename = f"generated_speech_{int(time.time())}.wav" filepath = os.path.join(self.working_dir, filename) wavfile.write(filepath, SAMPLE_RATE, audio_array) return filepath, None except Exception as e: print(f"Speech generation error: {e}") import traceback traceback.print_exc() return None, f"Error generating speech: {str(e)}" def create_interface(): # Ensure working directory exists working_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "working_files") os.makedirs(working_dir, exist_ok=True) app = VoiceCloningApp() with gr.Blocks() as interface: gr.Markdown("# 🎙️ Voice Cloning App") with gr.Row(): with gr.Column(): gr.Markdown("## 1. Capture Reference Voice") reference_audio = gr.Audio(sources=["microphone", "upload"], type="numpy") process_btn = gr.Button("Process Reference Voice") process_output = gr.Textbox(label="Processing Result") with gr.Column(): gr.Markdown("## 2. Generate Speech") text_input = gr.Textbox(label="Enter Text to Speak") generate_btn = gr.Button("Generate Speech") audio_output = gr.Audio(label="Generated Speech") error_output = gr.Textbox(label="Errors", visible=True) # Bind functions process_btn.click( fn=app.process_reference_audio, inputs=reference_audio, outputs=process_output ) generate_btn.click( fn=app.generate_speech, inputs=text_input, outputs=[audio_output, error_output] ) return interface if __name__ == "__main__": interface = create_interface() interface.launch( share=False, debug=True, show_error=True, server_name='0.0.0.0', server_port=7860 )