File size: 2,132 Bytes
0c20337 a59b93c e16fd1e a59b93c a647645 e03756e e16fd1e 43e8b5c 5165e58 0c5c249 509a00f 8b02d24 0c5c249 5165e58 a59b93c 0c5c249 a59b93c 0c5c249 0c20337 a59b93c 0c5c249 5165e58 0c5c249 0c20337 a59b93c 0c5c249 0c20337 0c5c249 0c20337 0c5c249 a59b93c 0c5c249 5ad697d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import os
import gradio as gr
from openvoice.api import ToneColorConverter
from openvoice import se_extractor
import torch
import time
import uuid
# Environment fixes for HF Spaces
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["HF_HUB_CACHE"] = "/tmp/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
os.environ["MPLCONFIGDIR"] = "/tmp"
os.environ["XDG_CACHE_HOME"] = "/tmp"
os.environ["XDG_CONFIG_HOME"] = "/tmp"
os.environ["NUMBA_DISABLE_CACHE"] = "1"
os.makedirs("/tmp/huggingface", exist_ok=True)
os.makedirs("/tmp/flagged", exist_ok=True)
# Set model paths
ckpt_converter = "checkpoints/converter/config.json"
output_dir = "/tmp/outputs"
os.makedirs(output_dir, exist_ok=True)
# Initialize OpenVoice converter
tone_color_converter = ToneColorConverter(ckpt_converter)
# Speaker embedding cache
ref_speaker_embed = None
def clone_and_speak(text, speaker_wav):
if not speaker_wav:
return "Please upload a reference .wav file."
# Generate a unique filename
timestamp = str(int(time.time()))
base_name = f"output_{timestamp}_{uuid.uuid4().hex[:6]}"
output_wav = os.path.join(output_dir, f"{base_name}.wav")
# Extract style from uploaded speaker voice
global ref_speaker_embed
ref_speaker_embed = se_extractor.get_se(speaker_wav, tone_color_converter)
# Generate speech using base model
tone_color_converter.infer(
text=text,
speaker_id="openvoice",
language="en",
ref_speaker=speaker_wav,
ref_embed=ref_speaker_embed,
output_path=output_wav,
top_k=10,
temperature=0.3
)
return output_wav
# Gradio interface (exposed as global `demo` for HF Spaces)
demo = gr.Interface(
fn=clone_and_speak,
inputs=[
gr.Textbox(label="Enter Text"),
gr.Audio(type="filepath", label="Upload a Reference Voice (.wav)")
],
outputs=gr.Audio(label="Synthesized Output"),
flagging_dir="/tmp/flagged", # safe temporary dir
title="Text to Voice using OpenVoice",
description="Clone any voice (English) and generate speech using OpenVoice on CPU.",
).launch()
|