Nymbo commited on
Commit
f4462c5
·
verified ·
1 Parent(s): b59c00d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -16
app.py CHANGED
@@ -1,8 +1,9 @@
1
  # File: main/app.py
2
- # Purpose: One Space that offers three tools/tabs:
3
  # 1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
4
  # 2) DuckDuckGo Search — compact JSONL search output (short keys to minimize tokens)
5
  # 3) Python Code Executor — run Python code and capture stdout/errors
 
6
 
7
  from __future__ import annotations
8
 
@@ -19,6 +20,18 @@ from readability import Document
19
  from urllib.parse import urljoin, urldefrag, urlparse
20
  from duckduckgo_search import DDGS
21
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # ==============================
24
  # Fetch: HTTP + extraction utils
@@ -431,8 +444,114 @@ def Execute_Python(code: Annotated[str, "Python source code to run; stdout is ca
431
  sys.stdout = old_stdout
432
 
433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  # ======================
435
- # UI: three-tab interface
436
  # ======================
437
 
438
  # --- Fetch tab (compact controllable extraction) ---
@@ -510,23 +629,13 @@ code_interface = gr.Interface(
510
  theme="Nymbo/Nymbo_Theme",
511
  )
512
 
513
- # --- Combine all into a single app with tabs ---
514
- demo = gr.TabbedInterface(
515
- interface_list=[fetch_interface, concise_interface, code_interface],
516
- tab_names=[
517
- "Fetch Webpage",
518
- "DuckDuckGo Search",
519
- "Python Code Executor",
520
- ],
521
- title="Tools MCP",
522
- theme="Nymbo/Nymbo_Theme",
523
- css="""
524
  .gradio-container h1 {
525
  text-align: center;
526
  }
527
  /* Default: add subtitle under titles */
528
  .gradio-container h1::after {
529
- content: "Fetch Webpage | Search DuckDuckGo | Code Interpreter";
530
  display: block;
531
  font-size: 1rem;
532
  font-weight: 500;
@@ -535,10 +644,45 @@ demo = gr.TabbedInterface(
535
  }
536
 
537
  /* But remove it inside tab panels so it doesn't duplicate under each tool title */
538
- .gradio-container [role="tabpanel"] h1::after {
539
  content: none !important;
540
  }
541
- """,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  )
543
 
544
  # Launch the UI and expose all functions as MCP tools in one server
 
1
  # File: main/app.py
2
+ # Purpose: One Space that offers four tools/tabs (all exposed as MCP tools):
3
  # 1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
4
  # 2) DuckDuckGo Search — compact JSONL search output (short keys to minimize tokens)
5
  # 3) Python Code Executor — run Python code and capture stdout/errors
6
+ # 4) Kokoro TTS — synthesize speech from text using Kokoro-82M
7
 
8
  from __future__ import annotations
9
 
 
20
  from urllib.parse import urljoin, urldefrag, urlparse
21
  from duckduckgo_search import DDGS
22
 
23
+ # Optional imports for Kokoro TTS (loaded lazily)
24
+ import numpy as np
25
+ try:
26
+ import torch # type: ignore
27
+ except Exception: # pragma: no cover - optional dependency
28
+ torch = None # type: ignore
29
+ try:
30
+ from kokoro import KModel, KPipeline # type: ignore
31
+ except Exception: # pragma: no cover - optional dependency
32
+ KModel = None # type: ignore
33
+ KPipeline = None # type: ignore
34
+
35
 
36
  # ==============================
37
  # Fetch: HTTP + extraction utils
 
444
  sys.stdout = old_stdout
445
 
446
 
447
+ # ==========================
448
+ # Kokoro TTS (MCP tool #4)
449
+ # ==========================
450
+
451
+ _KOKORO_STATE = {
452
+ "initialized": False,
453
+ "device": "cpu",
454
+ "model": None,
455
+ "pipelines": {},
456
+ }
457
+
458
+
459
+ def _init_kokoro() -> None:
460
+ """Lazy-initialize Kokoro model and pipelines on first use.
461
+
462
+ Tries CUDA if torch is present and available; falls back to CPU. Keeps a
463
+ minimal English pipeline and custom lexicon tweak for the word "kokoro".
464
+ """
465
+ if _KOKORO_STATE["initialized"]:
466
+ return
467
+
468
+ if KModel is None or KPipeline is None:
469
+ raise RuntimeError(
470
+ "Kokoro is not installed. Please install the 'kokoro' package (>=0.9.4)."
471
+ )
472
+
473
+ device = "cpu"
474
+ if torch is not None:
475
+ try:
476
+ if torch.cuda.is_available(): # type: ignore[attr-defined]
477
+ device = "cuda"
478
+ except Exception:
479
+ device = "cpu"
480
+
481
+ model = KModel().to(device).eval()
482
+ pipelines = {"a": KPipeline(lang_code="a", model=False)}
483
+ # Custom pronunciation
484
+ try:
485
+ pipelines["a"].g2p.lexicon.golds["kokoro"] = "kˈOkəɹO"
486
+ except Exception:
487
+ pass
488
+
489
+ _KOKORO_STATE.update(
490
+ {
491
+ "initialized": True,
492
+ "device": device,
493
+ "model": model,
494
+ "pipelines": pipelines,
495
+ }
496
+ )
497
+
498
+
499
+ def Kokoro_TextToAudio( # <-- MCP tool #4 (Kokoro TTS)
500
+ text: Annotated[str, "The text to synthesize (English)."],
501
+ speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.0,
502
+ voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
503
+ ) -> Tuple[int, np.ndarray]:
504
+ """
505
+ Synthesize speech from text using the Kokoro-82M model.
506
+
507
+ This function returns raw audio suitable for a Gradio Audio component and is
508
+ also exposed as an MCP tool (per the latest Hugging Face/Gradio MCP docs, a
509
+ tool is created for each function wired into your app; docstrings and type
510
+ hints are used to describe the tool).
511
+
512
+ Args:
513
+ text: The text to synthesize (English).
514
+ speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed.
515
+ voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
516
+
517
+ Returns:
518
+ A tuple of (sample_rate_hz, audio_waveform) where:
519
+ - sample_rate_hz: int sample rate in Hz (24_000)
520
+ - audio_waveform: numpy.ndarray float32 mono waveform in range [-1, 1]
521
+
522
+ Notes:
523
+ - Requires the 'kokoro' package (>=0.9.4). If unavailable, an error is
524
+ raised with installation guidance.
525
+ - Runs on CUDA if available; otherwise CPU.
526
+ """
527
+ if not text or not text.strip():
528
+ raise gr.Error("Please provide non-empty text to synthesize.")
529
+
530
+ _init_kokoro()
531
+ model = _KOKORO_STATE["model"]
532
+ pipelines = _KOKORO_STATE["pipelines"]
533
+
534
+ pipeline = pipelines.get("a")
535
+ if pipeline is None:
536
+ raise gr.Error("Kokoro English pipeline not initialized.")
537
+
538
+ pack = pipeline.load_voice(voice)
539
+ # Generate using the last reference state from the current phoneme sequence
540
+ for _, ps, _ in pipeline(text, voice, speed):
541
+ ref_s = pack[len(ps) - 1]
542
+ try:
543
+ audio = model(ps, ref_s, float(speed))
544
+ except Exception as e: # propagate as UI-friendly error
545
+ raise gr.Error(f"Error generating audio: {str(e)}")
546
+ # Return 24 kHz mono waveform
547
+ return 24_000, audio.detach().cpu().numpy()
548
+
549
+ # If pipeline produced no segments
550
+ raise gr.Error("No audio was generated (empty synthesis result).")
551
+
552
+
553
  # ======================
554
+ # UI: four-tab interface
555
  # ======================
556
 
557
  # --- Fetch tab (compact controllable extraction) ---
 
629
  theme="Nymbo/Nymbo_Theme",
630
  )
631
 
632
+ CSS_STYLES = """
 
 
 
 
 
 
 
 
 
 
633
  .gradio-container h1 {
634
  text-align: center;
635
  }
636
  /* Default: add subtitle under titles */
637
  .gradio-container h1::after {
638
+ content: "Fetch Webpage | Search DuckDuckGo | Code Interpreter | Kokoro TTS";
639
  display: block;
640
  font-size: 1rem;
641
  font-weight: 500;
 
644
  }
645
 
646
  /* But remove it inside tab panels so it doesn't duplicate under each tool title */
647
+ .gradio-container [role=\"tabpanel\"] h1::after {
648
  content: none !important;
649
  }
650
+ """
651
+
652
+ # --- Kokoro TTS tab (text to speech) ---
653
+ kokoro_interface = gr.Interface(
654
+ fn=Kokoro_TextToAudio,
655
+ inputs=[
656
+ gr.Textbox(label="Text", placeholder="Type text to synthesize…", lines=4),
657
+ gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed"),
658
+ gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
659
+ ],
660
+ outputs=gr.Audio(label="Audio", type="numpy"),
661
+ title="Kokoro TTS",
662
+ description=(
663
+ "<div style=\"text-align:center\">Synthesize English speech with Kokoro-82M. Requires the 'kokoro' package."
664
+ " Exposed as an MCP tool with clear type hints and docstrings per the latest HF/Gradio MCP guidance.</div>"
665
+ ),
666
+ api_description=(
667
+ "Synthesize speech from text using Kokoro-82M. Returns (sample_rate, waveform) suitable for playback."
668
+ " Parameters: text (str), speed (float 0.5–2.0), voice (str)."
669
+ ),
670
+ allow_flagging="never",
671
+ theme="Nymbo/Nymbo_Theme",
672
+ )
673
+
674
+ # Build tabbed app including Kokoro
675
+ demo = gr.TabbedInterface(
676
+ interface_list=[fetch_interface, concise_interface, code_interface, kokoro_interface],
677
+ tab_names=[
678
+ "Fetch Webpage",
679
+ "DuckDuckGo Search",
680
+ "Python Code Executor",
681
+ "Kokoro TTS",
682
+ ],
683
+ title="Tools MCP",
684
+ theme="Nymbo/Nymbo_Theme",
685
+ css=CSS_STYLES,
686
  )
687
 
688
  # Launch the UI and expose all functions as MCP tools in one server