import os import torch import time import uuid import gradio as gr from openvoice import se_extractor from openvoice.api import ToneColorConverter # Set writable cache directory for torch os.environ["TORCH_HOME"] = "/tmp/torch" os.makedirs("/tmp/torch", exist_ok=True) # Environment fixes for HF Spaces os.environ["HF_HOME"] = "/tmp/huggingface" os.environ["HF_HUB_CACHE"] = "/tmp/huggingface" os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" os.environ["MPLCONFIGDIR"] = "/tmp" os.environ["XDG_CACHE_HOME"] = "/tmp" os.environ["XDG_CONFIG_HOME"] = "/tmp" os.environ["NUMBA_DISABLE_CACHE"] = "1" os.makedirs("/tmp/huggingface", exist_ok=True) os.makedirs("/tmp/flagged", exist_ok=True) # Set model paths ckpt_converter = "checkpoints/converter/config.json" output_dir = "/tmp/outputs" os.makedirs(output_dir, exist_ok=True) # Initialize OpenVoice converter tone_color_converter = ToneColorConverter(ckpt_converter) # Speaker embedding cache ref_speaker_embed = None def clone_and_speak(text, speaker_wav): if not speaker_wav: return "Please upload a reference .wav file." # Generate a unique filename timestamp = str(int(time.time())) base_name = f"output_{timestamp}_{uuid.uuid4().hex[:6]}" output_wav = os.path.join(output_dir, f"{base_name}.wav") # Extract style from uploaded speaker voice global ref_speaker_embed ref_speaker_embed = se_extractor.get_se(speaker_wav, tone_color_converter, vad=False) # Generate speech using base model tone_color_converter.convert( text=text, speaker_id="openvoice", language="en", ref_speaker=speaker_wav, ref_embed=ref_speaker_embed, output_path=output_wav, top_k=10, temperature=0.3 ) return output_wav # Gradio interface (exposed as global `demo` for HF Spaces) gr.Interface( fn=clone_and_speak, inputs=[ gr.Textbox(label="Enter Text"), gr.Audio(type="filepath", label="Upload a Reference Voice (.wav)") ], outputs=gr.Audio(label="Synthesized Output"), flagging_dir="/tmp/flagged", # safe temporary dir title="Text to Voice using OpenVoice", description="Clone any voice (English) and generate speech using OpenVoice on CPU.", ).launch()