Spaces:

Nymbo
/

Tools

Running

App Files Files Community

Nymbo commited on 7 days ago

Commit

f4462c5

verified ·

1 Parent(s): b59c00d

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -16

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # File: main/app.py
-# Purpose: One Space that offers three tools/tabs:
 #   1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
 #   2) DuckDuckGo Search — compact JSONL search output (short keys to minimize tokens)
 #   3) Python Code Executor — run Python code and capture stdout/errors
 from __future__ import annotations
@@ -19,6 +20,18 @@ from readability import Document
 from urllib.parse import urljoin, urldefrag, urlparse
 from duckduckgo_search import DDGS
 # ==============================
 # Fetch: HTTP + extraction utils
@@ -431,8 +444,114 @@ def Execute_Python(code: Annotated[str, "Python source code to run; stdout is ca
         sys.stdout = old_stdout
 # ======================
-# UI: three-tab interface
 # ======================
 # --- Fetch tab (compact controllable extraction) ---
@@ -510,23 +629,13 @@ code_interface = gr.Interface(
     theme="Nymbo/Nymbo_Theme",
 )
-# --- Combine all into a single app with tabs ---
-demo = gr.TabbedInterface(
-    interface_list=[fetch_interface, concise_interface, code_interface],
-    tab_names=[
-        "Fetch Webpage",
-        "DuckDuckGo Search",
-        "Python Code Executor",
-    ],
-    title="Tools MCP",
-    theme="Nymbo/Nymbo_Theme",
-    css="""
     .gradio-container h1 {
         text-align: center;
     }
     /* Default: add subtitle under titles */
     .gradio-container h1::after {
-        content: "Fetch Webpage | Search DuckDuckGo | Code Interpreter";
         display: block;
         font-size: 1rem;
         font-weight: 500;
@@ -535,10 +644,45 @@ demo = gr.TabbedInterface(
     }
     /* But remove it inside tab panels so it doesn't duplicate under each tool title */
-    .gradio-container [role="tabpanel"] h1::after {
         content: none !important;
     }
-    """,
 )
 # Launch the UI and expose all functions as MCP tools in one server

 # File: main/app.py
+# Purpose: One Space that offers four tools/tabs (all exposed as MCP tools):
 #   1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
 #   2) DuckDuckGo Search — compact JSONL search output (short keys to minimize tokens)
 #   3) Python Code Executor — run Python code and capture stdout/errors
+#   4) Kokoro TTS — synthesize speech from text using Kokoro-82M
 from __future__ import annotations
 from urllib.parse import urljoin, urldefrag, urlparse
 from duckduckgo_search import DDGS
+# Optional imports for Kokoro TTS (loaded lazily)
+import numpy as np
+try:
+    import torch  # type: ignore
+except Exception:  # pragma: no cover - optional dependency
+    torch = None  # type: ignore
+try:
+    from kokoro import KModel, KPipeline  # type: ignore
+except Exception:  # pragma: no cover - optional dependency
+    KModel = None  # type: ignore
+    KPipeline = None  # type: ignore
 # ==============================
 # Fetch: HTTP + extraction utils
         sys.stdout = old_stdout
+# ==========================
+# Kokoro TTS (MCP tool #4)
+# ==========================
+_KOKORO_STATE = {
+    "initialized": False,
+    "device": "cpu",
+    "model": None,
+    "pipelines": {},
+}
+def _init_kokoro() -> None:
+    """Lazy-initialize Kokoro model and pipelines on first use.
+    Tries CUDA if torch is present and available; falls back to CPU. Keeps a
+    minimal English pipeline and custom lexicon tweak for the word "kokoro".
+    """
+    if _KOKORO_STATE["initialized"]:
+        return
+    if KModel is None or KPipeline is None:
+        raise RuntimeError(
+            "Kokoro is not installed. Please install the 'kokoro' package (>=0.9.4)."
+        )
+    device = "cpu"
+    if torch is not None:
+        try:
+            if torch.cuda.is_available():  # type: ignore[attr-defined]
+                device = "cuda"
+        except Exception:
+            device = "cpu"
+    model = KModel().to(device).eval()
+    pipelines = {"a": KPipeline(lang_code="a", model=False)}
+    # Custom pronunciation
+    try:
+        pipelines["a"].g2p.lexicon.golds["kokoro"] = "kˈOkəɹO"
+    except Exception:
+        pass
+    _KOKORO_STATE.update(
+        {
+            "initialized": True,
+            "device": device,
+            "model": model,
+            "pipelines": pipelines,
+        }
+    )
+def Kokoro_TextToAudio(  # <-- MCP tool #4 (Kokoro TTS)
+    text: Annotated[str, "The text to synthesize (English)."],
+    speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.0,
+    voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
+) -> Tuple[int, np.ndarray]:
+    """
+    Synthesize speech from text using the Kokoro-82M model.
+    This function returns raw audio suitable for a Gradio Audio component and is
+    also exposed as an MCP tool (per the latest Hugging Face/Gradio MCP docs, a
+    tool is created for each function wired into your app; docstrings and type
+    hints are used to describe the tool).
+    Args:
+        text: The text to synthesize (English).
+        speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed.
+        voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
+    Returns:
+        A tuple of (sample_rate_hz, audio_waveform) where:
+        - sample_rate_hz: int sample rate in Hz (24_000)
+        - audio_waveform: numpy.ndarray float32 mono waveform in range [-1, 1]
+    Notes:
+        - Requires the 'kokoro' package (>=0.9.4). If unavailable, an error is
+          raised with installation guidance.
+        - Runs on CUDA if available; otherwise CPU.
+    """
+    if not text or not text.strip():
+        raise gr.Error("Please provide non-empty text to synthesize.")
+    _init_kokoro()
+    model = _KOKORO_STATE["model"]
+    pipelines = _KOKORO_STATE["pipelines"]
+    pipeline = pipelines.get("a")
+    if pipeline is None:
+        raise gr.Error("Kokoro English pipeline not initialized.")
+    pack = pipeline.load_voice(voice)
+    # Generate using the last reference state from the current phoneme sequence
+    for _, ps, _ in pipeline(text, voice, speed):
+        ref_s = pack[len(ps) - 1]
+        try:
+            audio = model(ps, ref_s, float(speed))
+        except Exception as e:  # propagate as UI-friendly error
+            raise gr.Error(f"Error generating audio: {str(e)}")
+        # Return 24 kHz mono waveform
+        return 24_000, audio.detach().cpu().numpy()
+    # If pipeline produced no segments
+    raise gr.Error("No audio was generated (empty synthesis result).")
 # ======================
+# UI: four-tab interface
 # ======================
 # --- Fetch tab (compact controllable extraction) ---
     theme="Nymbo/Nymbo_Theme",
 )
+CSS_STYLES = """
     .gradio-container h1 {
         text-align: center;
     }
     /* Default: add subtitle under titles */
     .gradio-container h1::after {
+        content: "Fetch Webpage | Search DuckDuckGo | Code Interpreter | Kokoro TTS";
         display: block;
         font-size: 1rem;
         font-weight: 500;
     }
     /* But remove it inside tab panels so it doesn't duplicate under each tool title */
+    .gradio-container [role=\"tabpanel\"] h1::after {
         content: none !important;
     }
+"""
+# --- Kokoro TTS tab (text to speech) ---
+kokoro_interface = gr.Interface(
+    fn=Kokoro_TextToAudio,
+    inputs=[
+        gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
+        gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed"),
+        gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
+    ],
+    outputs=gr.Audio(label="Audio", type="numpy"),
+    title="Kokoro TTS",
+    description=(
+        "<div style=\"text-align:center\">Synthesize English speech with Kokoro-82M. Requires the 'kokoro' package."
+        " Exposed as an MCP tool with clear type hints and docstrings per the latest HF/Gradio MCP guidance.</div>"
+    ),
+    api_description=(
+        "Synthesize speech from text using Kokoro-82M. Returns (sample_rate, waveform) suitable for playback."
+        " Parameters: text (str), speed (float 0.5–2.0), voice (str)."
+    ),
+    allow_flagging="never",
+    theme="Nymbo/Nymbo_Theme",
+)
+# Build tabbed app including Kokoro
+demo = gr.TabbedInterface(
+    interface_list=[fetch_interface, concise_interface, code_interface, kokoro_interface],
+    tab_names=[
+        "Fetch Webpage",
+        "DuckDuckGo Search",
+        "Python Code Executor",
+        "Kokoro TTS",
+    ],
+    title="Tools MCP",
+    theme="Nymbo/Nymbo_Theme",
+    css=CSS_STYLES,
 )
 # Launch the UI and expose all functions as MCP tools in one server