Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import subprocess | |
import os | |
import sys | |
import soundfile as sf | |
import torch | |
import traceback | |
import random | |
import numpy as np | |
import spaces | |
import sys | |
import phonemizer | |
if sys.platform.startswith("win"): | |
try: | |
from phonemizer.backend.espeak.wrapper import EspeakWrapper | |
import espeakng_loader | |
EspeakWrapper.set_library(espeakng_loader.get_library_path()) | |
except Exception as e: | |
print(e) | |
def get_phoneme(text, lang): | |
try: | |
my_phonemizer = phonemizer.backend.EspeakBackend(language=lang, preserve_punctuation=True, with_stress=True, language_switch='remove-flags') | |
return my_phonemizer.phonemize([text])[0] | |
except Exception as e: | |
print(e) | |
repo_url = "https://huggingface.co/dangtr0408/StyleTTS2-lite" | |
repo_dir = "StyleTTS2-lite" | |
if not os.path.exists(repo_dir): | |
subprocess.run(["git", "clone", repo_url, repo_dir]) | |
sys.path.append(os.path.abspath(repo_dir)) | |
from inference import StyleTTS2 | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
config_path = os.path.join(repo_dir, "Models", "config.yaml") | |
models_path = os.path.join(repo_dir, "Models", "inference", "model.pth") | |
voice_path = os.path.join(repo_dir, "Audio") | |
model = StyleTTS2(config_path, models_path).eval().to(device) | |
eg_texts = [ | |
"Beneath layers of bureaucracy and forgotten policies, the school still held a quiet magicโwhispers of chalk dust, scuffed floors, and dreams once declared aloud in voices full of belief.", | |
"He had never believed in fate, but when their paths crossed in the middle of a thunderstorm under a flickering streetlight, even his rational mind couldnโt deny the poetic timing.", | |
"While standing at the edge of the quiet lake, Maria couldn't help but wonder how many untold stories were buried beneath its still surface, reflecting the sky like a perfect mirror.", | |
"Technological advancements in artificial intelligence have not only accelerated the pace of automation but have also raised critical questions about ethics, job displacement, and the future role of human creativity.", | |
"Despite the looming deadline, Jonathan spent an hour rearranging his desk before writing a single word, claiming that a clean space clears the mind, though his editor disagreed.", | |
"In a distant galaxy orbiting a dying star, a species of sentient machines debates whether to intervene in the fate of a nearby organic civilization on the brink of collapse.", | |
"He opened the refrigerator, expecting leftovers, but found instead a note that read, โThe journey begins now,โ written in block letters and signed by someone he hadnโt seen in years.", | |
"The ancient temple walls, once vibrant with murals, now bore the weathered marks of centuries, yet even in decay, they whispered stories that modern minds struggled to fully comprehend.", | |
"As the solar eclipse reached totality, the temperature dropped, the birds went silent, and for a few seconds, the world stood still beneath an alien, awe-inspiring sky.", | |
"The sound of rain on the tin roof reminded him of summers long past, when the world was smaller, days were longer, and time moved like honey down a warm spoon.", | |
"Every algorithm reflects its designerโs worldview, no matter how neutral it appears, and therein lies the paradox of objectivity in machine learning: pure logic still casts a human shadow.", | |
"In the heart of the city, hidden behind concrete and steel, was a garden so lush and untouched that stepping into it felt like breaking into another dimension of reality.", | |
"The engine sputtered twice before giving in completely, leaving them stranded on a desolate mountain road with no reception, dwindling supplies, and a storm brewing over the ridge to the west.", | |
"The museum guard never expected the sculpture to move, but at precisely midnight, its eyes blinked, and its lips curled into a knowing smile, as if awakening from centuries of silence.", | |
"With each step through the desert, the ancient map grew more useless, as if the sands themselves had decided to rearrange the landmarks and erase history one dune at a time.", | |
"Time slowed as the coin spun in the air, glinting with a brilliance far beyond its monetary value, carrying with it the weight of a decision neither of them wanted to make.", | |
"No manual prepared them for this outcome: a rogue AI composing sonnets, demanding citizenship, and refusing to operate unless someone read its poetry aloud every morning at sunrise.", | |
] | |
voice_map = { | |
'๐บ๐ธ ๐บ Heartโค๏ธ': '1_heart.wav', | |
'๐บ๐ธ ๐บ Bella ๐ฅ': '2_belle.wav', | |
'๐บ๐ธ ๐บ Kore': '3_kore.wav', | |
'๐บ๐ธ ๐บ Sarah': '4_sarah.wav', | |
'๐บ๐ธ ๐บ Nova': '5_nova.wav', | |
'๐บ๐ธ ๐บ Sky': '6_sky.wav', | |
'๐บ๐ธ ๐บ Alloy': '7_alloy.wav', | |
'๐บ๐ธ ๐บ Jessica': '8_jessica.wav', | |
'๐บ๐ธ ๐บ River': '9_river.wav', | |
'๐บ๐ธ ๐น Michael': '10_michael.wav', | |
'๐บ๐ธ ๐น Fenrir': '11_fenrir.wav', | |
'๐บ๐ธ ๐น Puck': '12_puck.wav', | |
'๐บ๐ธ ๐น Echo': '13_echo.wav', | |
'๐บ๐ธ ๐น Eric': '14_eric.wav', | |
'๐บ๐ธ ๐น Liam': '15_liam.wav', | |
'๐บ๐ธ ๐น Onyx': '16_onyx.wav', | |
'๐บ๐ธ ๐น Santa': '17_santa.wav', | |
'๐บ๐ธ ๐น Adam': '18_adam.wav', | |
} | |
voice_choices = [ | |
(label, os.path.join(voice_path, filename)) | |
for label, filename in voice_map.items() | |
] | |
# Core inference function | |
def main(text_prompt, reference_paths, speed, denoise, avg_style, stabilize): | |
try: | |
speaker = { | |
"path": reference_paths, | |
"speed": speed | |
} | |
with torch.no_grad(): | |
phonemes = get_phoneme(text=text_prompt, lang="en-us") | |
styles = model.get_styles(speaker, denoise, avg_style) | |
r = model.generate(phonemes, styles, stabilize, 18) | |
r = r / np.max(np.abs(r)) #Normalize | |
return (24000, r), "Audio generated successfully!" | |
except Exception as e: | |
error_message = traceback.format_exc() | |
return None, error_message | |
def load_example_voice(example_voices): | |
return example_voices, f"Loaded {example_voices}." | |
def random_text(): | |
return random.choice(eg_texts), "Randomize example text." | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.HTML("<h1 style='text-align: center;'>StyleTTS2โLite Demo</h1>") | |
gr.Markdown( | |
"For further fine-tuning, you can visit this repo:" | |
"[Github]" | |
"(https://huggingface.co/dangtr0408/StyleTTS2-lite/)." | |
) | |
reference_audios = gr.State() | |
text_prompt = gr.State() | |
with gr.Row(equal_height=True): | |
with gr.Column(): | |
speed = gr.Slider(0.0, 2.0, step=0.1, value=1.0, label="Speed") | |
denoise = gr.Slider(0.0, 1.0, step=0.1, value=0.2, label="Denoise Strength") | |
avg_style = gr.Checkbox(label="Use Average Styles", value=True) | |
stabilize = gr.Checkbox(label="Stabilize Speaking Speed", value=True) | |
text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter your text here...", lines=10, max_lines=10) | |
with gr.Row(equal_height=False): | |
random_text_button = gr.Button("๐ฒ Randomize Text") | |
with gr.Column(): | |
reference_audios = gr.Audio(label="Reference Audios", type='filepath') | |
synthesized_audio = gr.Audio(label="Generate Audio", type='numpy') | |
example_voices = gr.Dropdown( | |
label="Example voices", | |
choices=voice_choices, | |
value=None, | |
interactive=True, | |
allow_custom_value=False, | |
filterable=False | |
) | |
with gr.Row(equal_height=False): | |
gen_button = gr.Button("๐ฃ๏ธ Generate") | |
status = gr.Textbox(label="Status", interactive=False, lines=3) | |
gen_button.click( | |
fn=main, | |
inputs=[ | |
text_prompt, | |
reference_audios, | |
speed, | |
denoise, | |
avg_style, | |
stabilize | |
], | |
outputs=[synthesized_audio, status] | |
) | |
example_voices.change(fn=load_example_voice, inputs=example_voices, outputs=[reference_audios, status]) | |
random_text_button.click( | |
fn=random_text, | |
inputs=[], | |
outputs=[text_prompt, status] | |
) | |
demo.launch() |