Spaces:
Running
on
Zero
Running
on
Zero
Refactor text validation to a character counter; streamline speech generation process and improve UI feedback
Browse files
app.py
CHANGED
@@ -162,13 +162,11 @@ def redistribute_codes(code_list, snac_model):
|
|
162 |
audio_hat = snac_model.decode(codes)
|
163 |
return audio_hat.detach().squeeze().cpu().numpy()
|
164 |
|
165 |
-
#
|
166 |
-
def
|
167 |
-
"""
|
168 |
-
|
169 |
-
|
170 |
-
return text[:MAX_LENGTH], True # Return truncated text and truncation flag
|
171 |
-
return text, False
|
172 |
|
173 |
# Main generation function with rate limiting
|
174 |
@rate_limit
|
@@ -178,16 +176,16 @@ def generate_speech(text, temperature=0.6, top_p=0.95, repetition_penalty=1.1, m
|
|
178 |
gr.Warning("Please enter some text to generate speech.")
|
179 |
return None
|
180 |
|
181 |
-
#
|
182 |
-
|
183 |
-
|
184 |
-
gr.Warning(
|
185 |
|
186 |
try:
|
187 |
progress(0.1, "Processing text...")
|
188 |
-
print(f"Generating speech for text: {
|
189 |
|
190 |
-
input_ids, attention_mask = process_prompt(
|
191 |
|
192 |
progress(0.3, "Generating speech tokens...")
|
193 |
with torch.no_grad():
|
@@ -229,21 +227,16 @@ def generate_speech(text, temperature=0.6, top_p=0.95, repetition_penalty=1.1, m
|
|
229 |
|
230 |
# Examples - reduced for quota management
|
231 |
examples = [
|
232 |
-
["ជំរាបសួរ ខ្ញុំឈ្មោះ Kiri ហើយខ្ញុំជា AI ដែលអាចបម្លែងអត្ថបទទៅជាសំលេង។"],
|
233 |
["ខ្ញុំអាចបង្កើតសំលេងនិយាយផ្សេងៗ ដូចជា <laugh> សើច។"],
|
234 |
["ម្សិលមិញ ខ្ញុំឃើញឆ្មាមួយក្បាលដេញចាប់កន្ទុយខ្លួនឯង។ <laugh> វាគួរឲ្យអស់សំណើចណាស់។"],
|
235 |
["ខ្ញុំរៀបចំម្ហូប ស្រាប់តែធ្វើជ្រុះគ្រឿងទេសពេញឥដ្ឋ។ <chuckle> វាប្រឡាក់អស់ហើយ។"],
|
236 |
["ថ្ងៃនេះហត់ណាស់ ធ្វើការពេញមួយថ្ងៃ។ <sigh> ចង់ទៅផ្ទះសម្រាកហើយ។"],
|
237 |
-
["អាកាសធាតុត្រជាក់ ធ្វើឲ្យខ្ញុំផ្តាសាយតិចៗ។ <sniffle> ខ្ញុំក៏ក្អកដែរ។ <cough>"],
|
238 |
-
["ការប្រឡងមិនបានល្អដូចការរំពឹងទុកទេ។ <groan> ខ្ញុំត្រូវរៀនឲ្យខ្លាំងជាងនេះ។"],
|
239 |
-
["កិច្ចប្រជុំនេះវែងអន្លាយពេកហើយ។ <yawn> ខ្ញុំចាប់ផ្តើមងងុយគេងហើយ។"],
|
240 |
-
["ខ្ញុំដើរទៅទិញអីញ៉ាំ ស្រាប់តែឃើញឆ្កែធំមួយរត់មករកខ្ញុំ។ <gasp> ខ្ញុំភ័យណាស់!"],
|
241 |
-
["អរគុណច្រើនសម្រាប់ជំនួយ។ <chuckle> បើគ្មានអ្នកទេ ខ្ញុំមិនដឹងធ្វើយ៉ាងម៉េចទេ។"],
|
242 |
]
|
243 |
|
244 |
EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
|
245 |
|
246 |
-
# Create custom CSS
|
247 |
css = """
|
248 |
.gradio-container {
|
249 |
max-width: 1200px;
|
@@ -296,11 +289,17 @@ with gr.Blocks(title="Khmer Text-to-Speech", css=css, theme=gr.themes.Soft()) as
|
|
296 |
lines=4,
|
297 |
max_lines=6,
|
298 |
interactive=True,
|
299 |
-
|
300 |
)
|
301 |
|
302 |
-
#
|
303 |
-
char_info = gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
|
305 |
# Advanced Settings
|
306 |
with gr.Accordion("🔧 Advanced Settings", open=False):
|
@@ -347,51 +346,38 @@ with gr.Blocks(title="Khmer Text-to-Speech", css=css, theme=gr.themes.Soft()) as
|
|
347 |
label="📝 Example Texts (អត្ថបទគំរូ) - Click example then press Generate"
|
348 |
)
|
349 |
|
350 |
-
#
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
if (textInput && charCounter) {
|
357 |
-
function updateCounter() {
|
358 |
-
const length = textInput.value.length;
|
359 |
-
const maxLength = 150;
|
360 |
-
charCounter.textContent = `Characters: ${length}/${maxLength}`;
|
361 |
-
|
362 |
-
if (length > maxLength) {
|
363 |
-
charCounter.style.color = '#ff6b6b';
|
364 |
-
} else {
|
365 |
-
charCounter.style.color = '#666';
|
366 |
-
}
|
367 |
-
}
|
368 |
-
|
369 |
-
textInput.addEventListener('input', updateCounter);
|
370 |
-
updateCounter(); // Initial count
|
371 |
-
}
|
372 |
-
}
|
373 |
-
""")
|
374 |
|
375 |
-
# Set up event handlers
|
376 |
submit_btn.click(
|
377 |
-
fn=
|
|
|
|
|
|
|
378 |
inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
|
379 |
-
outputs=audio_output,
|
380 |
show_progress=True
|
381 |
)
|
382 |
|
383 |
clear_btn.click(
|
384 |
-
fn=lambda: ("", None),
|
385 |
inputs=[],
|
386 |
-
outputs=[text_input, audio_output]
|
387 |
-
js="() => { document.querySelector('#char-counter').textContent = 'Characters: 0/150'; }"
|
388 |
)
|
389 |
|
390 |
# Add keyboard shortcut
|
391 |
text_input.submit(
|
392 |
-
fn=
|
|
|
|
|
|
|
393 |
inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
|
394 |
-
outputs=audio_output,
|
395 |
show_progress=True
|
396 |
)
|
397 |
|
|
|
162 |
audio_hat = snac_model.decode(codes)
|
163 |
return audio_hat.detach().squeeze().cpu().numpy()
|
164 |
|
165 |
+
# Simple character counter function (only called when needed)
|
166 |
+
def update_char_count(text):
|
167 |
+
"""Simple character counter - no text modification"""
|
168 |
+
count = len(text) if text else 0
|
169 |
+
return f"Characters: {count}/150"
|
|
|
|
|
170 |
|
171 |
# Main generation function with rate limiting
|
172 |
@rate_limit
|
|
|
176 |
gr.Warning("Please enter some text to generate speech.")
|
177 |
return None
|
178 |
|
179 |
+
# Check length and truncate if needed
|
180 |
+
if len(text) > 150:
|
181 |
+
text = text[:150]
|
182 |
+
gr.Warning("Text was truncated to 150 characters.")
|
183 |
|
184 |
try:
|
185 |
progress(0.1, "Processing text...")
|
186 |
+
print(f"Generating speech for text: {text[:50]}...")
|
187 |
|
188 |
+
input_ids, attention_mask = process_prompt(text, voice, tokenizer, device)
|
189 |
|
190 |
progress(0.3, "Generating speech tokens...")
|
191 |
with torch.no_grad():
|
|
|
227 |
|
228 |
# Examples - reduced for quota management
|
229 |
examples = [
|
230 |
+
["ជំរាបសួរ <laugh> ខ្ញុំឈ្មោះ Kiri ហើយខ្ញុំជា AI ដែលអាចបម្លែងអត្ថបទទៅជាសំលេង។"],
|
231 |
["ខ្ញុំអាចបង្កើតសំលេងនិយាយផ្សេងៗ ដូចជា <laugh> សើច។"],
|
232 |
["ម្សិលមិញ ខ្ញុំឃើញឆ្មាមួយក្បាលដេញចាប់កន្ទុយខ្លួនឯង។ <laugh> វាគួរឲ្យអស់សំណើចណាស់។"],
|
233 |
["ខ្ញុំរៀបចំម្ហូប ស្រាប់តែធ្វើជ្រុះគ្រឿងទេសពេញឥដ្ឋ។ <chuckle> វាប្រឡាក់អស់ហើយ។"],
|
234 |
["ថ្ងៃនេះហត់ណាស់ ធ្វើការពេញមួយថ្ងៃ។ <sigh> ចង់ទៅផ្ទះសម្រាកហើយ។"],
|
|
|
|
|
|
|
|
|
|
|
235 |
]
|
236 |
|
237 |
EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
|
238 |
|
239 |
+
# Create custom CSS
|
240 |
css = """
|
241 |
.gradio-container {
|
242 |
max-width: 1200px;
|
|
|
289 |
lines=4,
|
290 |
max_lines=6,
|
291 |
interactive=True,
|
292 |
+
max_length=150 # Built-in Gradio character limit
|
293 |
)
|
294 |
|
295 |
+
# Simple character counter
|
296 |
+
char_info = gr.Textbox(
|
297 |
+
value="Characters: 0/150",
|
298 |
+
interactive=False,
|
299 |
+
show_label=False,
|
300 |
+
container=False,
|
301 |
+
elem_classes=["char-counter"]
|
302 |
+
)
|
303 |
|
304 |
# Advanced Settings
|
305 |
with gr.Accordion("🔧 Advanced Settings", open=False):
|
|
|
346 |
label="📝 Example Texts (អត្ថបទគំរូ) - Click example then press Generate"
|
347 |
)
|
348 |
|
349 |
+
# Character counter - only updates when focus lost or generation clicked
|
350 |
+
text_input.blur(
|
351 |
+
fn=update_char_count,
|
352 |
+
inputs=[text_input],
|
353 |
+
outputs=[char_info]
|
354 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
|
356 |
+
# Set up event handlers
|
357 |
submit_btn.click(
|
358 |
+
fn=lambda text, temp, top_p, rep_pen, max_tok: [
|
359 |
+
generate_speech(text, temp, top_p, rep_pen, max_tok),
|
360 |
+
update_char_count(text)
|
361 |
+
],
|
362 |
inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
|
363 |
+
outputs=[audio_output, char_info],
|
364 |
show_progress=True
|
365 |
)
|
366 |
|
367 |
clear_btn.click(
|
368 |
+
fn=lambda: ("", None, "Characters: 0/150"),
|
369 |
inputs=[],
|
370 |
+
outputs=[text_input, audio_output, char_info]
|
|
|
371 |
)
|
372 |
|
373 |
# Add keyboard shortcut
|
374 |
text_input.submit(
|
375 |
+
fn=lambda text, temp, top_p, rep_pen, max_tok: [
|
376 |
+
generate_speech(text, temp, top_p, rep_pen, max_tok),
|
377 |
+
update_char_count(text)
|
378 |
+
],
|
379 |
inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
|
380 |
+
outputs=[audio_output, char_info],
|
381 |
show_progress=True
|
382 |
)
|
383 |
|