Spaces:

Hassan-16
/

TTS

Running

App Files Files Community

Hassan-16 commited on Jun 28

Commit

21977f5

verified ·

1 Parent(s): 5dda1a8

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -50

app.py CHANGED Viewed

@@ -57,19 +57,19 @@ def load_voice_choices():
     for voice_file in voice_files:
         prefix = voice_file[:2]
         if prefix == 'af':
-            label = f"🇺🇸 🚺 {voice_file[3:-3].capitalize()}"
         elif prefix == 'am':
-            label = f"🇺🇸 🚹 {voice_file[3:-3].capitalize()}"
         elif prefix == 'bf':
-            label = f"🇬🇧 🚺 {voice_file[3:-3].capitalize()}"
         elif prefix == 'bm':
-            label = f"🇬🇧 🚹 {voice_file[3:-3].capitalize()}"
         else:
-            label = f"Unknown {voice_file[:-3]}"
         choices[label] = voice_file
     if not choices:
         logger.warning("No voice files found in VOICE_DIR. Adding a placeholder.")
-        choices = {"🇺🇸 🚺 Bella 🔥": "af_bella.pt"}
     VOICE_CHOICES = choices
     return choices
@@ -87,10 +87,10 @@ def generate_first(text, voice="af_bella.pt", speed=1, use_gpu=CUDA_AVAILABLE):
     start_time = time.time()
     if len(text) > 510:
         text = text[:510]
-        gr.Warning("Text truncated to 510 characters for performance.")
     voice_path = os.path.join(VOICE_DIR, voice)
     if not os.path.exists(voice_path):
-        raise FileNotFoundError(f"Voice file not found: {voice_path}")
     pipeline = pipelines[voice[0]]
     use_gpu = use_gpu and CUDA_AVAILABLE
@@ -117,10 +117,10 @@ def generate_first(text, voice="af_bella.pt", speed=1, use_gpu=CUDA_AVAILABLE):
 def tokenize_first(text, voice="af_bella.pt"):
     if len(text) > 510:
         text = text[:510]
-        gr.Warning("Text truncated to 510 characters for performance.")
     voice_path = os.path.join(VOICE_DIR, voice)
     if not os.path.exists(voice_path):
-        raise FileNotFoundError(f"Voice file not found: {voice_path}")
     pipeline = pipelines[voice[0]]
     generator = pipeline(text, voice=voice_path)
@@ -132,10 +132,10 @@ def generate_all(text, voice="af_bella.pt", speed=1, use_gpu=CUDA_AVAILABLE):
     start_time = time.time()
     if len(text) > 510:
         text = text[:510]
-        gr.Warning("Text truncated to 510 characters for performance.")
     voice_path = os.path.join(VOICE_DIR, voice)
     if not os.path.exists(voice_path):
-        raise FileNotFoundError(f"Voice file not found: {voice_path}")
     pipeline = pipelines[voice[0]]
     use_gpu = use_gpu and CUDA_AVAILABLE
@@ -151,52 +151,124 @@ def generate_all(text, voice="af_bella.pt", speed=1, use_gpu=CUDA_AVAILABLE):
     logger.info(f"Streaming generation time: {time.time() - start_time} seconds")
 TOKEN_NOTE = '''
-💡 Customize pronunciation with Markdown link syntax and /slashes/ like [Kokoro](/kˈOkəɹO/)
-💬 To adjust intonation, try punctuation ;:,.!?—…"()“” or stress ˈ and ˌ
-⬇️ Lower stress [1 level](-1) or [2 levels](-2)
-⬆️ Raise stress 1 level [or](+2) 2 levels (only works on less stressed, usually short words)
 '''
-with gr.Blocks(theme="soft") as app:
-    with gr.Row():
-        with gr.Column():
-            text = gr.Textbox(label="Input Text", value=TEXT, info="Arbitrarily many characters supported (max 510)")
-            with gr.Row():
-                voice = gr.Dropdown(list(CHOICES.items()), value="af_bella.pt" if "af_bella.pt" in CHOICES.values() else list(CHOICES.values())[0], label="Voice",
-                                    info="Quality and availability vary by language")
-                use_gpu = gr.Dropdown(
-                    [("GPU 🚀", True), ("CPU 🐌", False)],
-                    value=CUDA_AVAILABLE,
-                    label="Hardware",
-                    info="GPU is faster but requires CUDA support",
-                    interactive=CUDA_AVAILABLE
                 )
-            speed = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="Speed")
-        with gr.Column():
-            with gr.Tab(label="Generate"):
-                out_audio = gr.Audio(label="Output Audio", interactive=False, streaming=False, autoplay=True)
-                generate_btn = gr.Button("Generate", variant="primary")
-                with gr.Accordion("Output Tokens", open=True):
-                    out_ps = gr.Textbox(interactive=False, show_label=False,
-                                        info="Tokens used to generate the audio, up to 510 context length.")
-                    tokenize_btn = gr.Button("Tokenize", variant="secondary")
                     gr.Markdown(TOKEN_NOTE)
-            with gr.Tab(label="Stream"):
-                out_stream = gr.Audio(label="Output Audio Stream", interactive=False, streaming=True, autoplay=True)
                 with gr.Row():
-                    stream_btn = gr.Button("Stream", variant="primary")
-                    stop_btn = gr.Button("Stop", variant="stop")
-                gr.Markdown("⚠️ Streaming may have initial delays due to processing.")
-    generate_btn.click(fn=generate_first, inputs=[text, voice, speed, use_gpu], outputs=[out_audio, out_ps])
-    tokenize_btn.click(fn=tokenize_first, inputs=[text, voice], outputs=[out_ps])
-    stream_event = stream_btn.click(fn=generate_all, inputs=[text, voice, speed, use_gpu], outputs=[out_stream])
     stop_btn.click(fn=None, cancels=[stream_event])
 if __name__ == "__main__":
     logger.info("Starting Gradio app...")
-    app.launch()
     logger.info("Gradio app started.")

     for voice_file in voice_files:
         prefix = voice_file[:2]
         if prefix == 'af':
+            label = f"🇺🇸 Female: {voice_file[3:-3].capitalize()}"
         elif prefix == 'am':
+            label = f"🇺🇸 Male: {voice_file[3:-3].capitalize()}"
         elif prefix == 'bf':
+            label = f"🇬🇧 Female: {voice_file[3:-3].capitalize()}"
         elif prefix == 'bm':
+            label = f"🇬🇧 Male: {voice_file[3:-3].capitalize()}"
         else:
+            label = f"Unknown: {voice_file[:-3]}"
         choices[label] = voice_file
     if not choices:
         logger.warning("No voice files found in VOICE_DIR. Adding a placeholder.")
+        choices = {"🇺🇸 Female: Bella": "af_bella.pt"}
     VOICE_CHOICES = choices
     return choices
     start_time = time.time()
     if len(text) > 510:
         text = text[:510]
+        gr.Warning("Text truncated to 510 characters for faster processing.")
     voice_path = os.path.join(VOICE_DIR, voice)
     if not os.path.exists(voice_path):
+        raise gr.Error(f"Voice file not found: {voice_path}")
     pipeline = pipelines[voice[0]]
     use_gpu = use_gpu and CUDA_AVAILABLE
 def tokenize_first(text, voice="af_bella.pt"):
     if len(text) > 510:
         text = text[:510]
+        gr.Warning("Text truncated to 510 characters for faster processing.")
     voice_path = os.path.join(VOICE_DIR, voice)
     if not os.path.exists(voice_path):
+        raise gr.Error(f"Voice file not found: {voice_path}")
     pipeline = pipelines[voice[0]]
     generator = pipeline(text, voice=voice_path)
     start_time = time.time()
     if len(text) > 510:
         text = text[:510]
+        gr.Warning("Text truncated to 510 characters for faster processing.")
     voice_path = os.path.join(VOICE_DIR, voice)
     if not os.path.exists(voice_path):
+        raise gr.Error(f"Voice file not found: {voice_path}")
     pipeline = pipelines[voice[0]]
     use_gpu = use_gpu and CUDA_AVAILABLE
     logger.info(f"Streaming generation time: {time.time() - start_time} seconds")
 TOKEN_NOTE = '''
+**How to Customize Pronunciation**
+- Use Markdown link syntax, e.g., `[Kokoro](/kˈOkəɹO/)` for custom pronunciation.
+- Adjust intonation with punctuation: `;:,.!?—…"()“”`.
+- Control stress: `[word](-1)` or `[word](-2)` to lower, `[word](+1)` or `[word](+2)` to raise stress.
 '''
+with gr.Blocks(theme="huggingface", css=".gr-button-primary {background-color: #1e88e5 !important; color: white !important;}") as app:
+    gr.Markdown("# Kokoro TTS: Text-to-Speech Generator")
+    gr.Markdown("Enter text and select a voice to generate high-quality audio. Adjust speed for faster or slower speech.")
+    with gr.Column():
+        text = gr.Textbox(
+            label="Input Text",
+            value=TEXT,
+            placeholder="Type your text here (max 510 characters)",
+            lines=3,
+            max_lines=5,
+            info="Enter text to convert to speech."
+        )
+        with gr.Row():
+            voice = gr.Dropdown(
+                choices=list(CHOICES.items()),
+                value="af_bella.pt" if "af_bella.pt" in CHOICES.values() else list(CHOICES.values())[0],
+                label="Voice",
+                info="Choose a voice for the audio output."
+            )
+            use_gpu = gr.Dropdown(
+                choices=[("GPU 🚀 (Faster)", True), ("CPU 🐌 (Slower)", False)],
+                value=CUDA_AVAILABLE,
+                label="Hardware",
+                info="GPU is faster but requires CUDA support.",
+                interactive=CUDA_AVAILABLE
+            )
+        speed = gr.Slider(
+            minimum=0.5,
+            maximum=2,
+            value=1,
+            step=0.1,
+            label="Speech Speed",
+            info="Adjust the speed of the generated audio (0.5 = slower, 2 = faster)."
+        )
+        with gr.Tabs():
+            with gr.Tab(label="Generate Audio"):
+                out_audio = gr.Audio(
+                    label="Generated Audio",
+                    interactive=False,
+                    streaming=False,
+                    autoplay=True
+                )
+                status = gr.Textbox(
+                    value="Ready to generate audio.",
+                    label="Status",
+                    interactive=False
                 )
+                generate_btn = gr.Button("Generate Audio", variant="primary")
+                with gr.Accordion("Pronunciation Tokens", open=False):
+                    out_ps = gr.Textbox(
+                        interactive=False,
+                        show_label=False,
+                        info="Tokens used to generate the audio."
+                    )
+                    tokenize_btn = gr.Button("Show Tokens", variant="secondary")
                     gr.Markdown(TOKEN_NOTE)
+            with gr.Tab(label="Stream Audio"):
+                out_stream = gr.Audio(
+                    label="Streaming Audio",
+                    interactive=False,
+                    streaming=True,
+                    autoplay=True
+                )
+                status_stream = gr.Textbox(
+                    value="Ready to stream audio.",
+                    label="Status",
+                    interactive=False
+                )
                 with gr.Row():
+                    stream_btn = gr.Button("Start Streaming", variant="primary")
+                    stop_btn = gr.Button("Stop Streaming", variant="stop")
+                gr.Markdown("⚠️ Streaming may have slight delays due to processing.")
+    def update_status_generate(text, voice, speed, use_gpu):
+        status.value = "Generating audio..."
+        result, ps = generate_first(text, voice, speed, use_gpu)
+        status.value = "Audio generated successfully!" if result else "Failed to generate audio."
+        return result, ps
+    def update_status_tokenize(text, voice):
+        status.value = "Tokenizing text..."
+        result = tokenize_first(text, voice)
+        status.value = "Tokenization complete!" if result else "Failed to tokenize."
+        return result
+    def update_status_stream(text, voice, speed, use_gpu):
+        status_stream.value = "Starting audio stream..."
+        for audio in generate_all(text, voice, speed, use_gpu):
+            yield audio
+        status_stream.value = "Streaming complete!"
+    generate_btn.click(
+        fn=update_status_generate,
+        inputs=[text, voice, speed, use_gpu],
+        outputs=[out_audio, out_ps, status]
+    )
+    tokenize_btn.click(
+        fn=update_status_tokenize,
+        inputs=[text, voice],
+        outputs=[out_ps, status]
+    )
+    stream_event = stream_btn.click(
+        fn=update_status_stream,
+        inputs=[text, voice, speed, use_gpu],
+        outputs=[out_stream, status_stream]
+    )
     stop_btn.click(fn=None, cancels=[stream_event])
 if __name__ == "__main__":
     logger.info("Starting Gradio app...")
+    app.launch(queue=False)
     logger.info("Gradio app started.")