Spaces:

mrrtmob
/

khmer-tts

Running on Zero

App Files Files Community

mrrtmob commited on 25 days ago

Commit

54096db

1 Parent(s): c6ae943

Refactor text validation to a character counter; streamline speech generation process and improve UI feedback

Browse files

Files changed (1) hide show

app.py +41 -55

app.py CHANGED Viewed

@@ -162,13 +162,11 @@ def redistribute_codes(code_list, snac_model):
     audio_hat = snac_model.decode(codes)
     return audio_hat.detach().squeeze().cpu().numpy()
-# Text validation function - now only validates, doesn't truncate
-def validate_and_truncate_text(text):
-    """Validate and truncate text length"""
-    MAX_LENGTH = 150
-    if len(text) > MAX_LENGTH:
-        return text[:MAX_LENGTH], True  # Return truncated text and truncation flag
-    return text, False
 # Main generation function with rate limiting
 @rate_limit
@@ -178,16 +176,16 @@ def generate_speech(text, temperature=0.6, top_p=0.95, repetition_penalty=1.1, m
         gr.Warning("Please enter some text to generate speech.")
         return None
-    # Validate and truncate text length
-    validated_text, was_truncated = validate_and_truncate_text(text)
-    if was_truncated:
-        gr.Warning(f"Text was truncated to 150 characters for processing.")
     try:
         progress(0.1, "Processing text...")
-        print(f"Generating speech for text: {validated_text[:50]}...")
-        input_ids, attention_mask = process_prompt(validated_text, voice, tokenizer, device)
         progress(0.3, "Generating speech tokens...")
         with torch.no_grad():
@@ -229,21 +227,16 @@ def generate_speech(text, temperature=0.6, top_p=0.95, repetition_penalty=1.1, m
 # Examples - reduced for quota management
 examples = [
-    ["ជំរាបសួរ ខ្ញុំឈ្មោះ Kiri ហើយខ្ញុំជា AI ដែលអាចបម្លែងអត្ថបទទៅជាសំលេង។"],
     ["ខ្ញុំអាចបង្កើតសំលេងនិយាយផ្សេងៗ ដូចជា <laugh> សើច។"],
     ["ម្សិលមិញ ខ្ញុំឃើញឆ្មាមួយក្បាលដេញចាប់កន្ទុយខ្លួនឯង។ <laugh> វាគួរឲ្យអស់សំណើចណាស់។"],
     ["ខ្ញុំរៀបចំម្ហូប ស្រាប់តែធ្វើជ្រុះគ្រឿងទេសពេញឥដ្ឋ។ <chuckle> វាប្រឡាក់អស់ហើយ។"],
     ["ថ្ងៃនេះហត់ណាស់ ធ្វើការពេញមួយថ្ងៃ។ <sigh> ចង់ទៅផ្ទះសម្រាកហើយ។"],
-    ["អាកាសធាតុត្រជាក់ ធ្វើឲ្យខ្ញុំផ្តាសាយតិចៗ។ <sniffle> ខ្ញុំក៏ក្អកដែរ។ <cough>"],
-    ["ការប្រឡងមិនបានល្អដូចការរំពឹងទុកទេ។ <groan> ខ្ញុំត្រូវរៀនឲ្យខ្លាំងជាងនេះ។"],
-    ["កិច្ចប្រជុំនេះវែងអន្លាយពេកហើយ។ <yawn> ខ្ញុំចាប់ផ្តើមងងុយគេងហើយ។"],
-    ["ខ្ញុំដើរទៅទិញអីញ៉ាំ ស្រាប់តែឃើញឆ្កែធំមួយរត់មករកខ្ញុំ។ <gasp> ខ្ញុំភ័យណាស់!"],
-    ["អរគុណច្រើនសម្រាប់ជំនួយ។ <chuckle> បើគ្មានអ្នកទេ ខ្ញុំមិនដឹងធ្វើយ៉ាងម៉េចទេ។"],
 ]
 EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
-# Create custom CSS with character counter using JavaScript
 css = """
 .gradio-container {
     max-width: 1200px;
@@ -296,11 +289,17 @@ with gr.Blocks(title="Khmer Text-to-Speech", css=css, theme=gr.themes.Soft()) as
                 lines=4,
                 max_lines=6,
                 interactive=True,
-                elem_id="text_input"
             )
-            # Static character counter - will be updated by JavaScript
-            char_info = gr.HTML('<div class="char-counter" id="char-counter">Characters: 0/150</div>')
             # Advanced Settings
             with gr.Accordion("🔧 Advanced Settings", open=False):
@@ -347,51 +346,38 @@ with gr.Blocks(title="Khmer Text-to-Speech", css=css, theme=gr.themes.Soft()) as
         label="📝 Example Texts (អត្ថបទគំរូ) - Click example then press Generate"
     )
-    # Add JavaScript for real-time character counting without server calls
-    demo.load(js="""
-    function() {
-        const textInput = document.querySelector('#text_input textarea');
-        const charCounter = document.querySelector('#char-counter');
-        if (textInput && charCounter) {
-            function updateCounter() {
-                const length = textInput.value.length;
-                const maxLength = 150;
-                charCounter.textContent = `Characters: ${length}/${maxLength}`;
-                if (length > maxLength) {
-                    charCounter.style.color = '#ff6b6b';
-                } else {
-                    charCounter.style.color = '#666';
-                }
-            }
-            textInput.addEventListener('input', updateCounter);
-            updateCounter(); // Initial count
-        }
-    }
-    """)
-    # Set up event handlers - NO text change event
     submit_btn.click(
-        fn=generate_speech,
         inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
-        outputs=audio_output,
         show_progress=True
     )
     clear_btn.click(
-        fn=lambda: ("", None),
         inputs=[],
-        outputs=[text_input, audio_output],
-        js="() => { document.querySelector('#char-counter').textContent = 'Characters: 0/150'; }"
     )
     # Add keyboard shortcut
     text_input.submit(
-        fn=generate_speech,
         inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
-        outputs=audio_output,
         show_progress=True
     )

     audio_hat = snac_model.decode(codes)
     return audio_hat.detach().squeeze().cpu().numpy()
+# Simple character counter function (only called when needed)
+def update_char_count(text):
+    """Simple character counter - no text modification"""
+    count = len(text) if text else 0
+    return f"Characters: {count}/150"
 # Main generation function with rate limiting
 @rate_limit
         gr.Warning("Please enter some text to generate speech.")
         return None
+    # Check length and truncate if needed
+    if len(text) > 150:
+        text = text[:150]
+        gr.Warning("Text was truncated to 150 characters.")
     try:
         progress(0.1, "Processing text...")
+        print(f"Generating speech for text: {text[:50]}...")
+        input_ids, attention_mask = process_prompt(text, voice, tokenizer, device)
         progress(0.3, "Generating speech tokens...")
         with torch.no_grad():
 # Examples - reduced for quota management
 examples = [
+    ["ជំរាបសួរ <laugh> ខ្ញុំឈ្មោះ Kiri ហើយខ្ញុំជា AI ដែលអាចបម្លែងអត្ថបទទៅជាសំលេង។"],
     ["ខ្ញុំអាចបង្កើតសំលេងនិយាយផ្សេងៗ ដូចជា <laugh> សើច។"],
     ["ម្សិលមិញ ខ្ញុំឃើញឆ្មាមួយក្បាលដេញចាប់កន្ទុយខ្លួនឯង។ <laugh> វាគួរឲ្យអស់សំណើចណាស់។"],
     ["ខ្ញុំរៀបចំម្ហូប ស្រាប់តែធ្វើជ្រុះគ្រឿងទេសពេញឥដ្ឋ។ <chuckle> វាប្រឡាក់អស់ហើយ។"],
     ["ថ្ងៃនេះហត់ណាស់ ធ្វើការពេញមួយថ្ងៃ។ <sigh> ចង់ទៅផ្ទះសម្រាកហើយ។"],
 ]
 EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
+# Create custom CSS
 css = """
 .gradio-container {
     max-width: 1200px;
                 lines=4,
                 max_lines=6,
                 interactive=True,
+                max_length=150  # Built-in Gradio character limit
             )
+            # Simple character counter
+            char_info = gr.Textbox(
+                value="Characters: 0/150",
+                interactive=False,
+                show_label=False,
+                container=False,
+                elem_classes=["char-counter"]
+            )
             # Advanced Settings
             with gr.Accordion("🔧 Advanced Settings", open=False):
         label="📝 Example Texts (អត្ថបទគំរូ) - Click example then press Generate"
     )
+    # Character counter - only updates when focus lost or generation clicked
+    text_input.blur(
+        fn=update_char_count,
+        inputs=[text_input],
+        outputs=[char_info]
+    )
+    # Set up event handlers
     submit_btn.click(
+        fn=lambda text, temp, top_p, rep_pen, max_tok: [
+            generate_speech(text, temp, top_p, rep_pen, max_tok),
+            update_char_count(text)
+        ],
         inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
+        outputs=[audio_output, char_info],
         show_progress=True
     )
     clear_btn.click(
+        fn=lambda: ("", None, "Characters: 0/150"),
         inputs=[],
+        outputs=[text_input, audio_output, char_info]
     )
     # Add keyboard shortcut
     text_input.submit(
+        fn=lambda text, temp, top_p, rep_pen, max_tok: [
+            generate_speech(text, temp, top_p, rep_pen, max_tok),
+            update_char_count(text)
+        ],
         inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
+        outputs=[audio_output, char_info],
         show_progress=True
     )