mrrtmob commited on
Commit
54096db
·
1 Parent(s): c6ae943

Refactor text validation to a character counter; streamline speech generation process and improve UI feedback

Browse files
Files changed (1) hide show
  1. app.py +41 -55
app.py CHANGED
@@ -162,13 +162,11 @@ def redistribute_codes(code_list, snac_model):
162
  audio_hat = snac_model.decode(codes)
163
  return audio_hat.detach().squeeze().cpu().numpy()
164
 
165
- # Text validation function - now only validates, doesn't truncate
166
- def validate_and_truncate_text(text):
167
- """Validate and truncate text length"""
168
- MAX_LENGTH = 150
169
- if len(text) > MAX_LENGTH:
170
- return text[:MAX_LENGTH], True # Return truncated text and truncation flag
171
- return text, False
172
 
173
  # Main generation function with rate limiting
174
  @rate_limit
@@ -178,16 +176,16 @@ def generate_speech(text, temperature=0.6, top_p=0.95, repetition_penalty=1.1, m
178
  gr.Warning("Please enter some text to generate speech.")
179
  return None
180
 
181
- # Validate and truncate text length
182
- validated_text, was_truncated = validate_and_truncate_text(text)
183
- if was_truncated:
184
- gr.Warning(f"Text was truncated to 150 characters for processing.")
185
 
186
  try:
187
  progress(0.1, "Processing text...")
188
- print(f"Generating speech for text: {validated_text[:50]}...")
189
 
190
- input_ids, attention_mask = process_prompt(validated_text, voice, tokenizer, device)
191
 
192
  progress(0.3, "Generating speech tokens...")
193
  with torch.no_grad():
@@ -229,21 +227,16 @@ def generate_speech(text, temperature=0.6, top_p=0.95, repetition_penalty=1.1, m
229
 
230
  # Examples - reduced for quota management
231
  examples = [
232
- ["ជំរាបសួរ ខ្ញុំឈ្មោះ Kiri ហើយខ្ញុំជា AI ដែលអាចបម្លែងអត្ថបទទៅជាសំលេង។"],
233
  ["ខ្ញុំអាចបង្កើតសំលេងនិយាយផ្សេងៗ ដូចជា <laugh> សើច។"],
234
  ["ម្សិលមិញ ខ្ញុំឃើញឆ្មាមួយក្បាលដេញចាប់កន្ទុយខ្លួនឯង។ <laugh> វាគួរឲ្យអស់សំណើចណាស់។"],
235
  ["ខ្ញុំរៀបចំម្ហូប ស្រាប់តែធ្វើជ្រុះគ្រឿងទេសពេញឥដ្ឋ។ <chuckle> វាប្រឡាក់អស់ហើយ។"],
236
  ["ថ្ងៃនេះហត់ណាស់ ធ្វើការពេញមួយថ្ងៃ។ <sigh> ចង់ទៅផ្ទះសម្រាកហើយ។"],
237
- ["អាកាសធាតុត្រជាក់ ធ្វើឲ្យខ្ញុំផ្តាសាយតិចៗ។ <sniffle> ខ្ញុំក៏ក្អកដែរ។ <cough>"],
238
- ["ការប្រឡងមិនបានល្អដូចការរំពឹងទុកទេ។ <groan> ខ្ញុំត្រូវរៀនឲ្យខ្លាំងជាងនេះ។"],
239
- ["កិច្ចប្រជុំនេះវែងអន្លាយពេកហើយ។ <yawn> ខ្ញុំចាប់ផ្តើមងងុយគេងហើយ។"],
240
- ["ខ្ញុំដើរទៅទិញអីញ៉ាំ ស្រាប់តែឃើញឆ្កែធំមួយរត់មករកខ្ញុំ។ <gasp> ខ្ញុំភ័យណាស់!"],
241
- ["អរគុណច្រើនសម្រាប់ជំនួយ។ <chuckle> បើគ្មានអ្នកទេ ខ្ញុំមិនដឹងធ្វើយ៉ាងម៉េចទេ។"],
242
  ]
243
 
244
  EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
245
 
246
- # Create custom CSS with character counter using JavaScript
247
  css = """
248
  .gradio-container {
249
  max-width: 1200px;
@@ -296,11 +289,17 @@ with gr.Blocks(title="Khmer Text-to-Speech", css=css, theme=gr.themes.Soft()) as
296
  lines=4,
297
  max_lines=6,
298
  interactive=True,
299
- elem_id="text_input"
300
  )
301
 
302
- # Static character counter - will be updated by JavaScript
303
- char_info = gr.HTML('<div class="char-counter" id="char-counter">Characters: 0/150</div>')
 
 
 
 
 
 
304
 
305
  # Advanced Settings
306
  with gr.Accordion("🔧 Advanced Settings", open=False):
@@ -347,51 +346,38 @@ with gr.Blocks(title="Khmer Text-to-Speech", css=css, theme=gr.themes.Soft()) as
347
  label="📝 Example Texts (អត្ថបទគំរូ) - Click example then press Generate"
348
  )
349
 
350
- # Add JavaScript for real-time character counting without server calls
351
- demo.load(js="""
352
- function() {
353
- const textInput = document.querySelector('#text_input textarea');
354
- const charCounter = document.querySelector('#char-counter');
355
-
356
- if (textInput && charCounter) {
357
- function updateCounter() {
358
- const length = textInput.value.length;
359
- const maxLength = 150;
360
- charCounter.textContent = `Characters: ${length}/${maxLength}`;
361
-
362
- if (length > maxLength) {
363
- charCounter.style.color = '#ff6b6b';
364
- } else {
365
- charCounter.style.color = '#666';
366
- }
367
- }
368
-
369
- textInput.addEventListener('input', updateCounter);
370
- updateCounter(); // Initial count
371
- }
372
- }
373
- """)
374
 
375
- # Set up event handlers - NO text change event
376
  submit_btn.click(
377
- fn=generate_speech,
 
 
 
378
  inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
379
- outputs=audio_output,
380
  show_progress=True
381
  )
382
 
383
  clear_btn.click(
384
- fn=lambda: ("", None),
385
  inputs=[],
386
- outputs=[text_input, audio_output],
387
- js="() => { document.querySelector('#char-counter').textContent = 'Characters: 0/150'; }"
388
  )
389
 
390
  # Add keyboard shortcut
391
  text_input.submit(
392
- fn=generate_speech,
 
 
 
393
  inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
394
- outputs=audio_output,
395
  show_progress=True
396
  )
397
 
 
162
  audio_hat = snac_model.decode(codes)
163
  return audio_hat.detach().squeeze().cpu().numpy()
164
 
165
+ # Simple character counter function (only called when needed)
166
+ def update_char_count(text):
167
+ """Simple character counter - no text modification"""
168
+ count = len(text) if text else 0
169
+ return f"Characters: {count}/150"
 
 
170
 
171
  # Main generation function with rate limiting
172
  @rate_limit
 
176
  gr.Warning("Please enter some text to generate speech.")
177
  return None
178
 
179
+ # Check length and truncate if needed
180
+ if len(text) > 150:
181
+ text = text[:150]
182
+ gr.Warning("Text was truncated to 150 characters.")
183
 
184
  try:
185
  progress(0.1, "Processing text...")
186
+ print(f"Generating speech for text: {text[:50]}...")
187
 
188
+ input_ids, attention_mask = process_prompt(text, voice, tokenizer, device)
189
 
190
  progress(0.3, "Generating speech tokens...")
191
  with torch.no_grad():
 
227
 
228
  # Examples - reduced for quota management
229
  examples = [
230
+ ["ជំរាបសួរ <laugh> ខ្ញុំឈ្មោះ Kiri ហើយខ្ញុំជា AI ដែលអាចបម្លែងអត្ថបទទៅជាសំលេង។"],
231
  ["ខ្ញុំអាចបង្កើតសំលេងនិយាយផ្សេងៗ ដូចជា <laugh> សើច។"],
232
  ["ម្សិលមិញ ខ្ញុំឃើញឆ្មាមួយក្បាលដេញចាប់កន្ទុយខ្លួនឯង។ <laugh> វាគួរឲ្យអស់សំណើចណាស់។"],
233
  ["ខ្ញុំរៀបចំម្ហូប ស្រាប់តែធ្វើជ្រុះគ្រឿងទេសពេញឥដ្ឋ។ <chuckle> វាប្រឡាក់អស់ហើយ។"],
234
  ["ថ្ងៃនេះហត់ណាស់ ធ្វើការពេញមួយថ្ងៃ។ <sigh> ចង់ទៅផ្ទះសម្រាកហើយ។"],
 
 
 
 
 
235
  ]
236
 
237
  EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
238
 
239
+ # Create custom CSS
240
  css = """
241
  .gradio-container {
242
  max-width: 1200px;
 
289
  lines=4,
290
  max_lines=6,
291
  interactive=True,
292
+ max_length=150 # Built-in Gradio character limit
293
  )
294
 
295
+ # Simple character counter
296
+ char_info = gr.Textbox(
297
+ value="Characters: 0/150",
298
+ interactive=False,
299
+ show_label=False,
300
+ container=False,
301
+ elem_classes=["char-counter"]
302
+ )
303
 
304
  # Advanced Settings
305
  with gr.Accordion("🔧 Advanced Settings", open=False):
 
346
  label="📝 Example Texts (អត្ថបទគំរូ) - Click example then press Generate"
347
  )
348
 
349
+ # Character counter - only updates when focus lost or generation clicked
350
+ text_input.blur(
351
+ fn=update_char_count,
352
+ inputs=[text_input],
353
+ outputs=[char_info]
354
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
+ # Set up event handlers
357
  submit_btn.click(
358
+ fn=lambda text, temp, top_p, rep_pen, max_tok: [
359
+ generate_speech(text, temp, top_p, rep_pen, max_tok),
360
+ update_char_count(text)
361
+ ],
362
  inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
363
+ outputs=[audio_output, char_info],
364
  show_progress=True
365
  )
366
 
367
  clear_btn.click(
368
+ fn=lambda: ("", None, "Characters: 0/150"),
369
  inputs=[],
370
+ outputs=[text_input, audio_output, char_info]
 
371
  )
372
 
373
  # Add keyboard shortcut
374
  text_input.submit(
375
+ fn=lambda text, temp, top_p, rep_pen, max_tok: [
376
+ generate_speech(text, temp, top_p, rep_pen, max_tok),
377
+ update_char_count(text)
378
+ ],
379
  inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
380
+ outputs=[audio_output, char_info],
381
  show_progress=True
382
  )
383