File size: 1,974 Bytes
5165e58 0c20337 e19357b a647645 e03756e 0c5c249 0c20337 0c5c249 5165e58 0c5c249 509a00f 8b02d24 0c5c249 5165e58 0c5c249 0c20337 0c5c249 5165e58 0c5c249 0c20337 5165e58 0c5c249 0c20337 0c5c249 0c20337 0c5c249 0c20337 5165e58 0c5c249 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
import os
os.environ["MPLCONFIGDIR"] = "/tmp"
os.environ["XDG_CACHE_HOME"] = "/tmp"
os.environ["NUMBA_DISABLE_CACHE"] = "1"
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["HF_HUB_CACHE"] = "/tmp/huggingface"
os.environ["XDG_CONFIG_HOME"] = "/tmp"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
from openvoice.api import ToneColorConverter
from openvoice import se_extractor
import torch
import time
import uuid
# Set model paths
ckpt_converter = "checkpoints/converter/config.json"
output_dir = "/tmp/outputs"
os.makedirs(output_dir, exist_ok=True)
# Initialize converter
tone_color_converter = ToneColorConverter(ckpt_converter)
# Load base speaker embedding for style transfer
ref_speaker_embed = None
def clone_and_speak(text, speaker_wav):
if not speaker_wav:
return "Please upload a reference .wav file."
# Generate a unique filename
timestamp = str(int(time.time()))
base_name = f"output_{timestamp}_{uuid.uuid4().hex[:6]}"
output_wav = os.path.join(output_dir, f"{base_name}.wav")
# Extract style from uploaded speaker voice
global ref_speaker_embed
ref_speaker_embed = se_extractor.get_se(speaker_wav, tone_color_converter)
# Generate speech using base model (internal prompt and sampling)
tone_color_converter.infer(
text=text,
speaker_id="openvoice",
language="en",
ref_speaker=speaker_wav,
ref_embed=ref_speaker_embed,
output_path=output_wav,
top_k=10,
temperature=0.3
)
return output_wav
demo = gr.Interface(
fn=clone_and_speak,
inputs=[
gr.Textbox(label="Enter Text"),
gr.Audio(type="filepath", label="Upload a Reference Voice (.wav)")
],
outputs=gr.Audio(label="Synthesized Output"),
title="Text to Voice using OpenVoice",
description="Clone any voice (English) and generate speech using OpenVoice on CPU.",
)
if __name__ == "__main__":
demo.launch()
|