Spaces:

re-skill
/

tajik-tts

Running

App Files Files Community

muhtasham commited on Jun 25

Commit

0bb0b13

verified ·

1 Parent(s): 21f182e

Upload 2 files

Browse files

Files changed (2) hide show

app.py +203 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,203 @@

+import gradio as gr
+import httpx
+import os
+import atexit
+from loguru import logger
+# FastAPI endpoint URL - adjust this to match your actual endpoint
+API_URL = os.getenv("API_URL").rstrip('/')
+# Configure httpx client with retries and timeouts
+client = httpx.Client(
+    timeout=httpx.Timeout(
+        connect=10.0,    # connection timeout
+        read=120.0,      # read timeout
+        write=10.0,      # write timeout
+        pool=None,       # pool timeout
+    ),
+    limits=httpx.Limits(
+        max_keepalive_connections=5,
+        max_connections=10,
+        keepalive_expiry=30.0
+    ),
+    transport=httpx.HTTPTransport(
+        retries=3,  # Number of retries
+    )
+)
+def check_api_health():
+    """Check if the API is healthy before making requests"""
+    try:
+        response = client.get(f"{API_URL}/")
+        response.raise_for_status()
+        logger.info("API health check passed")
+        return True
+    except httpx.TimeoutException as e:
+        logger.error(f"API health check timed out: {str(e)}")
+        return False
+    except httpx.HTTPError as e:
+        logger.error(f"API health check failed: {str(e)}")
+        return False
+def generate_speech(text, temperature, top_p, repetition_penalty, max_new_tokens, progress=gr.Progress()):
+    if not text.strip():
+        logger.warning("Empty text input received")
+        return None
+    try:
+        # Check API health first
+        if not check_api_health():
+            logger.error("API is not healthy, aborting request")
+            raise gr.Error("The API service is currently unavailable. Please try again later.")
+        # Log input parameters
+        logger.info(f"Generating speech for text: {text[:50]}... with params: temp={temperature}, top_p={top_p}, rep_penalty={repetition_penalty}, max_tokens={max_new_tokens}")
+        # Prepare the request payload
+        payload = {
+            "text": text.strip(),
+            "return_type": "wav",  # Request WAV format directly
+            "temperature": temperature,
+            "top_p": top_p,
+            "repetition_penalty": repetition_penalty,
+            "max_new_tokens": max_new_tokens
+        }
+        # Update progress
+        progress(0.3, "Sending request to server ...")
+        # Make request to FastAPI endpoint
+        response = client.post(
+            f"{API_URL}/tts",
+            json=payload,
+            headers={"Content-Type": "application/json"}
+        )
+        # Log response status
+        logger.debug(f"Received response with status {response.status_code} and content-type {response.headers.get('content-type')}")
+        # Return the WAV bytes directly
+        if response.status_code == 200:
+            logger.info("Successfully generated speech in WAV format")
+            return response.content
+        else:
+            error_msg = f"API returned error status {response.status_code}"
+            logger.error(error_msg)
+            raise gr.Error(error_msg)
+    except httpx.TimeoutException as e:
+        error_msg = "Request timed out. The server took too long to respond."
+        logger.error(f"{error_msg}: {str(e)}")
+        raise gr.Error(error_msg)
+    except httpx.HTTPError as e:
+        error_msg = f"Network error while generating speech: {str(e)}"
+        logger.error(error_msg)
+        raise gr.Error(error_msg)
+    except Exception as e:
+        error_msg = f"Error generating speech: {str(e)}"
+        logger.error(error_msg, exc_info=True)
+        raise gr.Error(error_msg)
+# Clean up client on exit
+atexit.register(client.close)
+# Examples for the UI
+examples = [
+    [
+        "Салом, номи ман Али аст ва ман имрӯз мехоҳам ба шумо дар бораи забони тоҷикӣ ва аҳамияти он дар фарҳанги мо нақл кунам.",
+        0.6, 0.95, 1.1, 1800
+    ],
+    [
+        "Имрӯз ҳаво хеле хуб аст ва ман қарор додам, ки бо дӯстонам ба боғ равам ва якҷоя вақт гузаронем.",
+        0.6, 0.95, 1.1, 1200
+    ],
+    [
+        "Ман забони тоҷикӣ меомӯзам, зеро мехоҳам бо мардумони гуногун сӯҳбат кунам ва фарҳанги онҳоро беҳтар фаҳмам.",
+        0.6, 0.95, 1.1, 1200
+    ],
+    [
+        "Лутфан як пиёла чой диҳед, зеро ман имрӯз хеле хаста шудам ва мехоҳам каме истироҳат кунам.",
+        0.6, 0.95, 1.1, 1200
+    ],
+    [
+        "Шумо аз куҷо ҳастед ва чӣ гуна ба омӯзиши забони тоҷикӣ шурӯъ кардед?",
+        0.6, 0.95, 1.1, 1200
+    ],
+]
+# Create Gradio interface
+with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
+    gr.Markdown("""
+    # 🎵 [Tajik Orpheus Text-to-Speech](https://github.com/canopyai/Orpheus-TTS)
+    Enter your text below and hear it converted to natural-sounding speech with the Orpheus TTS model.
+    ## Tips for better prompts:
+    - Short text prompts generally work better than very long phrases
+    - Increasing `repetition_penalty` and `temperature` makes the model speak faster.
+    """)
+    with gr.Row():
+        with gr.Column(scale=3):
+            text_input = gr.Textbox(
+                label="Text to speak",
+                placeholder="Enter your text here...",
+                lines=5
+            )
+            with gr.Accordion("Advanced Settings", open=False):
+                temperature = gr.Slider(
+                    minimum=0.1, maximum=1.5, value=0.6, step=0.05,
+                    label="Temperature",
+                    info="Higher values (0.7-1.0) create more expressive but less stable speech"
+                )
+                top_p = gr.Slider(
+                    minimum=0.1, maximum=1.0, value=0.95, step=0.05,
+                    label="Top P",
+                    info="Nucleus sampling threshold"
+                )
+                repetition_penalty = gr.Slider(
+                    minimum=1.0, maximum=2.0, value=1.1, step=0.05,
+                    label="Repetition Penalty",
+                    info="Higher values discourage repetitive patterns"
+                )
+                max_new_tokens = gr.Slider(
+                    minimum=100, maximum=2000, value=1200, step=100,
+                    label="Max Length",
+                    info="Maximum length of generated audio (in tokens)"
+                )
+            with gr.Row():
+                submit_btn = gr.Button("Generate Speech", variant="primary")
+                clear_btn = gr.Button("Clear")
+        with gr.Column(scale=2):
+            # Audio component that can handle WAV bytes
+            audio_output = gr.Audio(
+                label="Generated Speech",
+                type="filepath"  # Changed from "auto" to "filepath" to handle WAV bytes
+            )
+    # Set up examples
+    gr.Examples(
+        examples=examples,
+        inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
+        outputs=audio_output,
+        fn=generate_speech,
+        cache_examples=False,
+    )
+    # Set up event handlers
+    submit_btn.click(
+        fn=generate_speech,
+        inputs=[text_input, temperature, top_p, repetition_penalty, max_new_tokens],
+        outputs=audio_output
+    )
+    clear_btn.click(
+        fn=lambda: (None, None),
+        inputs=[],
+        outputs=[text_input, audio_output]
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.queue().launch(share=False, ssr_mode=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+httpx
+loguru