import os os.environ["NUMBA_DISABLE_CACHE"] = "1" import gradio as gr import os import torch # Add openvoice path import sys sys.path.append("openvoice") from openvoice.api import ToneColorConverter from openvoice.inference import voice_conversion # Set up paths ckpt_converter = './checkpoints/converter' device = "cuda" if torch.cuda.is_available() else "cpu" converter = ToneColorConverter(f"{ckpt_converter}/config.json", device=device) converter.load_ckpt(f"{ckpt_converter}/converter.ckpt") def convert_voice(audio_file, text_prompt): output_path = "./results/output.wav" # You must clone reference audio using clone.sh or similar step in Dockerfile voice_conversion(converter, audio_file.name, text_prompt, output_path, device) return output_path iface = gr.Interface( fn=convert_voice, inputs=[ gr.Audio(type="filepath", label="Input Voice (WAV)"), gr.Textbox(label="Prompt (e.g., 'Speak in a cheerful tone')"), ], outputs=gr.Audio(label="Converted Voice") ) iface.launch()