dangtr0408 commited on
Commit
9c765a4
ยท
verified ยท
1 Parent(s): 3416dca

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitattributes +35 -35
  2. README.md +13 -14
  3. app.py +175 -0
  4. packages.txt +1 -0
  5. requirements.txt +10 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,13 @@
1
- ---
2
- title: StyleTTS2 Lite Space
3
- emoji: ๐Ÿ“‰
4
- colorFrom: green
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 5.26.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: StyleTTS2-lite-space
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: StyleTTS2 Lite Vi
3
+ emoji: ๐Ÿฆ€
4
+ colorFrom: gray
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 5.24.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: cc-by-nc-sa-4.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import os
4
+ import sys
5
+ import soundfile as sf
6
+ import torch
7
+ import traceback
8
+ import random
9
+ import spaces
10
+
11
+ import sys
12
+ import phonemizer
13
+ if sys.platform.startswith("win"):
14
+ try:
15
+ from phonemizer.backend.espeak.wrapper import EspeakWrapper
16
+ import espeakng_loader
17
+ EspeakWrapper.set_library(espeakng_loader.get_library_path())
18
+ except Exception as e:
19
+ print(e)
20
+
21
+ def get_phoneme(text, lang):
22
+ try:
23
+ my_phonemizer = phonemizer.backend.EspeakBackend(language=lang, preserve_punctuation=True, with_stress=True, language_switch='remove-flags')
24
+ return my_phonemizer.phonemize([text])[0]
25
+ except Exception as e:
26
+ print(e)
27
+
28
+ repo_url = "https://huggingface.co/dangtr0408/StyleTTS2-lite"
29
+ repo_dir = "StyleTTS2-lite"
30
+ if not os.path.exists(repo_dir):
31
+ subprocess.run(["git", "clone", repo_url, repo_dir])
32
+ sys.path.append(os.path.abspath(repo_dir))
33
+ from inference import StyleTTS2
34
+
35
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
36
+ config_path = os.path.join(repo_dir, "Models", "config.yaml")
37
+ models_path = os.path.join(repo_dir, "Models", "inference", "model.pth")
38
+ voice_path = os.path.join(repo_dir, "Audio")
39
+ model = StyleTTS2(config_path, models_path).eval().to(device)
40
+
41
+ eg_texts = [
42
+ "Beneath layers of bureaucracy and forgotten policies, the school still held a quiet magicโ€”whispers of chalk dust, scuffed floors, and dreams once declared aloud in voices full of belief.",
43
+ "He had never believed in fate, but when their paths crossed in the middle of a thunderstorm under a flickering streetlight, even his rational mind couldnโ€™t deny the poetic timing.",
44
+ "While standing at the edge of the quiet lake, Maria couldn't help but wonder how many untold stories were buried beneath its still surface, reflecting the sky like a perfect mirror.",
45
+ "Technological advancements in artificial intelligence have not only accelerated the pace of automation but have also raised critical questions about ethics, job displacement, and the future role of human creativity.",
46
+ "Despite the looming deadline, Jonathan spent an hour rearranging his desk before writing a single word, claiming that a clean space clears the mind, though his editor disagreed.",
47
+ "In a distant galaxy orbiting a dying star, a species of sentient machines debates whether to intervene in the fate of a nearby organic civilization on the brink of collapse.",
48
+ "He opened the refrigerator, expecting leftovers, but found instead a note that read, โ€œThe journey begins now,โ€ written in block letters and signed by someone he hadnโ€™t seen in years.",
49
+ "The ancient temple walls, once vibrant with murals, now bore the weathered marks of centuries, yet even in decay, they whispered stories that modern minds struggled to fully comprehend.",
50
+ "As the solar eclipse reached totality, the temperature dropped, the birds went silent, and for a few seconds, the world stood still beneath an alien, awe-inspiring sky.",
51
+ "The sound of rain on the tin roof reminded him of summers long past, when the world was smaller, days were longer, and time moved like honey down a warm spoon.",
52
+ "Every algorithm reflects its designerโ€™s worldview, no matter how neutral it appears, and therein lies the paradox of objectivity in machine learning: pure logic still casts a human shadow.",
53
+ "In the heart of the city, hidden behind concrete and steel, was a garden so lush and untouched that stepping into it felt like breaking into another dimension of reality.",
54
+ "The engine sputtered twice before giving in completely, leaving them stranded on a desolate mountain road with no reception, dwindling supplies, and a storm brewing over the ridge to the west.",
55
+ "The museum guard never expected the sculpture to move, but at precisely midnight, its eyes blinked, and its lips curled into a knowing smile, as if awakening from centuries of silence.",
56
+ "With each step through the desert, the ancient map grew more useless, as if the sands themselves had decided to rearrange the landmarks and erase history one dune at a time.",
57
+ "Time slowed as the coin spun in the air, glinting with a brilliance far beyond its monetary value, carrying with it the weight of a decision neither of them wanted to make.",
58
+ "No manual prepared them for this outcome: a rogue AI composing sonnets, demanding citizenship, and refusing to operate unless someone read its poetry aloud every morning at sunrise.",
59
+ ]
60
+
61
+ voice_map = {
62
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšบ Heartโค๏ธ': '1_heart.wav',
63
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšบ Bella ๐Ÿ”ฅ': '2_belle.wav',
64
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšบ Kore': '3_kore.wav',
65
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšบ Sarah': '4_sarah.wav',
66
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšบ Nova': '5_nova.wav',
67
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšบ Sky': '6_sky.wav',
68
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšบ Alloy': '7_alloy.wav',
69
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšบ Jessica': '8_jessica.wav',
70
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšบ River': '9_river.wav',
71
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšน Michael': '10_michael.wav',
72
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšน Fenrir': '11_fenrir.wav',
73
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšน Puck': '12_puck.wav',
74
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšน Echo': '13_echo.wav',
75
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšน Eric': '14_eric.wav',
76
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšน Liam': '15_liam.wav',
77
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšน Onyx': '16_onyx.wav',
78
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšน Santa': '17_santa.wav',
79
+ '๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿšน Adam': '18_adam.wav',
80
+ }
81
+
82
+ voice_choices = [
83
+ (label, os.path.join(voice_path, filename))
84
+ for label, filename in voice_map.items()
85
+ ]
86
+
87
+ # Core inference function
88
+ @spaces.GPU
89
+ def main(text_prompt, reference_paths, speed, denoise, avg_style, stabilize):
90
+ try:
91
+ speaker = {
92
+ "path": reference_paths,
93
+ "speed": speed
94
+ }
95
+ with torch.no_grad():
96
+ phonemes = get_phoneme(text=text_prompt, lang="en-us")
97
+
98
+ styles = model.get_styles(speaker, denoise, avg_style)
99
+ r = model.generate(phonemes, styles, stabilize, 18)
100
+
101
+ sf.write("output_demo.wav", r, samplerate=24000)
102
+ return "output_demo.wav", "Audio generated successfully!"
103
+
104
+ except Exception as e:
105
+ error_message = traceback.format_exc()
106
+ return None, error_message
107
+
108
+ def load_example_voice(example_voices):
109
+ return example_voices, f"Loaded {example_voices}."
110
+
111
+ def random_text():
112
+ return random.choice(eg_texts), "Randomize example text."
113
+
114
+ # Gradio UI
115
+ with gr.Blocks() as demo:
116
+ gr.HTML("<h1 style='text-align: center;'>StyleTTS2โ€‘Lite Demo</h1>")
117
+
118
+ gr.Markdown(
119
+ "For further fine-tuning, you can visit this repo:"
120
+ "[Github]"
121
+ "(https://huggingface.co/dangtr0408/StyleTTS2-lite/)."
122
+ )
123
+
124
+ reference_audios = gr.State()
125
+ text_prompt = gr.State()
126
+
127
+ with gr.Row(equal_height=True):
128
+ with gr.Column():
129
+ speed = gr.Slider(0.0, 2.0, step=0.1, value=1.0, label="Speed")
130
+ denoise = gr.Slider(0.0, 1.0, step=0.1, value=0.0, label="Denoise Strength")
131
+ avg_style = gr.Checkbox(label="Use Average Styles", value=False)
132
+ stabilize = gr.Checkbox(label="Stabilize Speaking Speed", value=False)
133
+
134
+ text_prompt = gr.Textbox(label="Text Prompt", placeholder="Enter your text here...", lines=10, max_lines=10)
135
+
136
+ with gr.Row(equal_height=False):
137
+ random_text_button = gr.Button("๐ŸŽฒ Randomize Text")
138
+
139
+ with gr.Column():
140
+ reference_audios = gr.Audio(label="Reference Audios", type='filepath')
141
+ synthesized_audio = gr.Audio(label="Generate Audio", type="filepath")
142
+
143
+ example_voices = gr.Dropdown(
144
+ label="Example voices",
145
+ choices=voice_choices,
146
+ value=voice_choices[0][0],
147
+ interactive=True,
148
+ )
149
+
150
+ with gr.Row(equal_height=False):
151
+ gen_button = gr.Button("๐Ÿ—ฃ๏ธ Generate")
152
+
153
+ status = gr.Textbox(label="Status", interactive=False, lines=3)
154
+
155
+ gen_button.click(
156
+ fn=main,
157
+ inputs=[
158
+ text_prompt,
159
+ reference_audios,
160
+ speed,
161
+ denoise,
162
+ avg_style,
163
+ stabilize
164
+ ],
165
+ outputs=[synthesized_audio, status]
166
+ )
167
+
168
+ example_voices.change(fn=load_example_voice, inputs=example_voices, outputs=[reference_audios, status])
169
+ random_text_button.click(
170
+ fn=random_text,
171
+ inputs=[],
172
+ outputs=[text_prompt, status]
173
+ )
174
+
175
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ espeak-ng
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchaudio
3
+ numpy
4
+ PyYAML
5
+ munch
6
+ nltk
7
+ librosa
8
+ noisereduce
9
+ phonemizer
10
+ espeakng-loader