Spaces:

Nymbo
/

Tools

Running

App Files Files Community

Nymbo commited on 8 days ago

Commit

dbd129c

verified ·

1 Parent(s): 42a9365

Update app.py

Browse files

Files changed (1) hide show

app.py +174 -16

app.py CHANGED Viewed

@@ -25,6 +25,7 @@ from duckduckgo_search import DDGS
 from PIL import Image
 from huggingface_hub import InferenceClient
 import time
 # Optional imports for Kokoro TTS (loaded lazily)
 import numpy as np
@@ -501,14 +502,15 @@ def Generate_Speech(  # <-- MCP tool #4 (Generate Speech)
     text: Annotated[str, "The text to synthesize (English)."],
     speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.0,
     voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
-) -> Tuple[int, np.ndarray]:
     """
     Synthesize speech from text using the Kokoro-82M model.
-    This function returns raw audio suitable for a Gradio Audio component and is
-    also exposed as an MCP tool (per the latest Hugging Face/Gradio MCP docs, a
-    tool is created for each function wired into your app; docstrings and type
-    hints are used to describe the tool).
     Args:
         text: The text to synthesize (English).
@@ -516,9 +518,8 @@ def Generate_Speech(  # <-- MCP tool #4 (Generate Speech)
         voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
     Returns:
-        A tuple of (sample_rate_hz, audio_waveform) where:
-        - sample_rate_hz: int sample rate in Hz (24_000)
-        - audio_waveform: numpy.ndarray float32 mono waveform in range [-1, 1]
     Notes:
         - Requires the 'kokoro' package (>=0.9.4). If unavailable, an error is
@@ -544,8 +545,11 @@ def Generate_Speech(  # <-- MCP tool #4 (Generate Speech)
             audio = model(ps, ref_s, float(speed))
         except Exception as e:  # propagate as UI-friendly error
             raise gr.Error(f"Error generating audio: {str(e)}")
-        # Return 24 kHz mono waveform
-        return 24_000, audio.detach().cpu().numpy()
     # If pipeline produced no segments
     raise gr.Error("No audio was generated (empty synthesis result).")
@@ -637,7 +641,7 @@ CSS_STYLES = """
     /* Place bold tools list on line 2, normal auth note on line 3 (below title) */
     .gradio-container h1::before {
         grid-row: 2;
-        content: "Fetch Webpage | Search DuckDuckGo | Code Interpreter | Kokoro TTS | Image Generation | Video Generation";
         display: block;
         font-size: 1rem;
         font-weight: 700;
@@ -647,7 +651,7 @@ CSS_STYLES = """
     }
     .gradio-container h1::after {
         grid-row: 3;
-        content: "Authentication is optional but Image/Video Generation require a `HF_READ_TOKEN` in env variables. They are hidden otherwise.";
         display: block;
         font-size: 1rem;
         font-weight: 400;
@@ -671,15 +675,14 @@ kokoro_interface = gr.Interface(
         gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed"),
         gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
     ],
-    outputs=gr.Audio(label="Audio", type="numpy"),
     title="Kokoro TTS",
     description=(
         "<div style=\"text-align:center\">Generate English speech with Kokoro-82M. 30 second max output. Runs on CPU or CUDA if available.</div>"
     ),
     api_description=(
-        "Synthesize speech from text using Kokoro-82M. Returns (sample_rate, waveform) suitable for playback. "
-        "Parameters: text (str), speed (float 0.5–2.0), voice (str). "
-        "Return the generated image to the user."
     ),
     allow_flagging="never",
 )
@@ -987,6 +990,158 @@ video_generation_interface = gr.Interface(
     allow_flagging="never",
 )
 # Build tabbed app; disable Image/Video tools if no HF token is present
 HAS_HF_TOKEN = bool(HF_API_TOKEN or HF_VIDEO_TOKEN)
@@ -1007,6 +1162,9 @@ if HAS_HF_TOKEN:
     _interfaces.extend([image_generation_interface, video_generation_interface])
     _tab_names.extend(["Image Generation", "Video Generation"])
 demo = gr.TabbedInterface(
     interface_list=_interfaces,
     tab_names=_tab_names,

 from PIL import Image
 from huggingface_hub import InferenceClient
 import time
+import wave
 # Optional imports for Kokoro TTS (loaded lazily)
 import numpy as np
     text: Annotated[str, "The text to synthesize (English)."],
     speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.0,
     voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
+) -> str:
     """
     Synthesize speech from text using the Kokoro-82M model.
+    Per current HF Gradio MCP guidance (see hf-docs-search), tools should return
+    browser/client-friendly artifacts where possible. This function returns the
+    path to a WAV file on disk so the UI renders an HTML5 audio player and MCP
+    clients receive a file URL that opens in the browser rather than forcing a
+    direct download.
     Args:
         text: The text to synthesize (English).
         voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
     Returns:
+        str: Path to a 24 kHz mono WAV file on disk (served by Gradio; MCP converts
+        paths to file URLs).
     Notes:
         - Requires the 'kokoro' package (>=0.9.4). If unavailable, an error is
             audio = model(ps, ref_s, float(speed))
         except Exception as e:  # propagate as UI-friendly error
             raise gr.Error(f"Error generating audio: {str(e)}")
+    # Save 24 kHz mono waveform to WAV and return its path for in-browser playback
+    sr = 24_000
+    wav = audio.detach().cpu().numpy()
+    path = _write_audio_tmp(wav, sample_rate=sr, suffix=".wav")
+    return path
     # If pipeline produced no segments
     raise gr.Error("No audio was generated (empty synthesis result).")
     /* Place bold tools list on line 2, normal auth note on line 3 (below title) */
     .gradio-container h1::before {
         grid-row: 2;
+    content: "Fetch Webpage | Search DuckDuckGo | Code Interpreter | Kokoro TTS | Image Generation | Video Generation | Generate Code";
         display: block;
         font-size: 1rem;
         font-weight: 700;
     }
     .gradio-container h1::after {
         grid-row: 3;
+    content: "Authentication is optional. Image/Video (and some Code) generation may require `HF_READ_TOKEN`; Image/Video tabs hide without it.";
         display: block;
         font-size: 1rem;
         font-weight: 400;
         gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed"),
         gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
     ],
+    outputs=gr.Audio(label="Audio", type="filepath"),
     title="Kokoro TTS",
     description=(
         "<div style=\"text-align:center\">Generate English speech with Kokoro-82M. 30 second max output. Runs on CPU or CUDA if available.</div>"
     ),
     api_description=(
+        "Synthesize speech from text using Kokoro-82M. Returns a file path to a 24 kHz mono WAV, which renders in-browser and is exposed as a file URL over MCP. "
+        "Parameters: text (str), speed (float 0.5–2.0), voice (str)."
     ),
     allow_flagging="never",
 )
     allow_flagging="never",
 )
+# ==========================
+# Audio helper (save WAV)
+# ==========================
+def _write_audio_tmp(audio: np.ndarray, sample_rate: int = 24_000, suffix: str = ".wav") -> str:
+    """Write mono float32 waveform [-1,1] to 16-bit PCM WAV and return path."""
+    if audio.ndim > 1:
+        audio = np.mean(audio, axis=0)
+    audio = np.clip(audio.astype(np.float32), -1.0, 1.0)
+    pcm = (audio * 32767.0).astype(np.int16)
+    os.makedirs("outputs", exist_ok=True)
+    fname = f"outputs/audio_{int(time.time())}_{random.randint(1000,9999)}{suffix}"
+    with wave.open(fname, "wb") as wf:
+        wf.setnchannels(1)
+        wf.setsampwidth(2)
+        wf.setframerate(sample_rate)
+        wf.writeframes(pcm.tobytes())
+    return fname
+# ==========================
+# Code Generation (Serverless)
+# ==========================
+def Generate_Code(
+    instruction: Annotated[str, "Describe the code to generate (requirements, I/O, constraints)."],
+    language: Annotated[str, "Optional language/framework hint (e.g., 'python', 'typescript react')."] = "",
+    model_id: Annotated[str, "HF text-generation model id (e.g., 'bigcode/starcoder2-3b')."] = "bigcode/starcoder2-3b",
+    max_new_tokens: Annotated[int, "Maximum tokens to generate (64–4096, model dependent)."] = 512,
+    temperature: Annotated[float, "Sampling temperature (0–1.5). Lower = more deterministic."] = 0.2,
+    top_p: Annotated[float, "Nucleus sampling p (0–1)."] = 0.95,
+    top_k: Annotated[int, "Top-k sampling cutoff (0 disables)."] = 50,
+    repetition_penalty: Annotated[float, "Discourage repeats (>1.0)."] = 1.05,
+    seed: Annotated[int, "Random seed (-1 = random)."] = -1,
+    save_to_file: Annotated[bool, "If true, save under ./outputs and prepend 'Saved to:' path."] = False,
+    filename: Annotated[str, "Optional filename when saving (e.g., main.py)."] = "",
+) -> str:
+    """
+    Generate source code via Hugging Face Inference text-generation models and return code as plain text.
+    Per current MCP docs (via hf-docs-search), schemas are inferred from type hints and docstrings. Returning
+    text is broadly compatible; when save_to_file is enabled, the response is prefixed with the saved path so
+    MCP clients can expose a file URL.
+    """
+    if not instruction or not instruction.strip():
+        raise gr.Error("Please provide a non-empty instruction.")
+    token = os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
+    providers = ["auto", "replicate", "fal-ai"]
+    lang_hint = f" in {language.strip()}" if language and language.strip() else ""
+    system_preamble = (
+        "You are a precise coding assistant. Output only runnable code without explanations. "
+        "Prefer idiomatic patterns, minimal comments, and include necessary imports."
+    )
+    prompt = (
+        f"{system_preamble}\n\nTask{lang_hint}:\n{instruction.strip()}\n\n"
+        "Return only the code, no backticks."
+    )
+    last_error: Exception | None = None
+    for provider in providers:
+        try:
+            client = InferenceClient(api_key=token, provider=provider)
+            out = client.text_generation(
+                model=model_id,
+                prompt=prompt,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                repetition_penalty=repetition_penalty,
+                seed=(None if seed == -1 else seed),
+                stream=False,
+            )
+            code = (out or "").strip()
+            if not code:
+                raise gr.Error("Model returned empty output.")
+            prefix = ""
+            if save_to_file:
+                os.makedirs("outputs", exist_ok=True)
+                base = filename.strip() or f"code_{int(time.time())}_{random.randint(1000,9999)}"
+                if "." not in base and language:
+                    ext_map = {
+                        "python": ".py",
+                        "py": ".py",
+                        "typescript": ".ts",
+                        "tsx": ".tsx",
+                        "javascript": ".js",
+                        "jsx": ".jsx",
+                        "go": ".go",
+                        "rust": ".rs",
+                        "java": ".java",
+                        "csharp": ".cs",
+                        "c#": ".cs",
+                        "cpp": ".cpp",
+                        "c++": ".cpp",
+                        "c": ".c",
+                        "bash": ".sh",
+                        "shell": ".sh",
+                        "html": ".html",
+                        "css": ".css",
+                        "json": ".json",
+                        "yaml": ".yaml",
+                        "yml": ".yml",
+                    }
+                    key = language.lower().split()[0]
+                    base += ext_map.get(key, "")
+                path = os.path.join("outputs", base)
+                with open(path, "w", encoding="utf-8") as f:
+                    f.write(code)
+                prefix = f"Saved to: {path}\n\n"
+            return f"{prefix}{code}"
+        except Exception as e:
+            last_error = e
+            continue
+    msg = str(last_error) if last_error else "Unknown error"
+    if "401" in msg or "403" in msg:
+        raise gr.Error("Authentication failed or not permitted. Set HF_READ_TOKEN/HF_TOKEN with inference access.")
+    if "404" in msg:
+        raise gr.Error(f"Model not found or unavailable: {model_id}.")
+    if "503" in msg:
+        raise gr.Error("The model is warming up. Please try again shortly.")
+    raise gr.Error(f"Code generation failed: {msg}")
+code_generation_interface = gr.Interface(
+    fn=Generate_Code,
+    inputs=[
+        gr.Textbox(label="Instruction", placeholder="Describe what to build, inputs/outputs, edge cases…", lines=6),
+        gr.Textbox(label="Language (optional)", value="", placeholder="e.g., python, typescript react"),
+        gr.Textbox(label="Model", value="bigcode/starcoder2-3b", placeholder="creator/model-name"),
+        gr.Slider(minimum=64, maximum=4096, value=512, step=16, label="Max new tokens"),
+        gr.Slider(minimum=0.0, maximum=1.5, value=0.2, step=0.05, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.01, label="Top-p"),
+        gr.Slider(minimum=0, maximum=200, value=50, step=1, label="Top-k"),
+        gr.Slider(minimum=1.0, maximum=2.0, value=1.05, step=0.01, label="Repetition penalty"),
+        gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)"),
+        gr.Checkbox(value=False, label="Save to file (./outputs)"),
+        gr.Textbox(label="Filename (optional)", value="", placeholder="e.g., main.py"),
+    ],
+    outputs=gr.Code(label="Generated Code"),
+    title="Generate Code",
+    description=(
+        "<div style=\"text-align:center\">Generate code via Hugging Face Inference text-generation models. Provide a clear instruction and (optionally) a language hint.</div>"
+    ),
+    api_description=(
+        "Generate source code using a HF Inference text-generation model. Parameters: instruction (str), language (str), model_id (str), "
+        "max_new_tokens (int), temperature (float), top_p (float), top_k (int), repetition_penalty (float), seed (int), save_to_file (bool), filename (str). "
+        "Returns the code as text; if saved, prepends 'Saved to: <path>'."
+    ),
+    allow_flagging="never",
+)
 # Build tabbed app; disable Image/Video tools if no HF token is present
 HAS_HF_TOKEN = bool(HF_API_TOKEN or HF_VIDEO_TOKEN)
     _interfaces.extend([image_generation_interface, video_generation_interface])
     _tab_names.extend(["Image Generation", "Video Generation"])
+# Always add Generate Code as the last tab
+_interfaces.append(code_generation_interface)
+_tab_names.append("Generate Code")
 demo = gr.TabbedInterface(
     interface_list=_interfaces,
     tab_names=_tab_names,