Nymbo commited on
Commit
dbd129c
·
verified ·
1 Parent(s): 42a9365

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -16
app.py CHANGED
@@ -25,6 +25,7 @@ from duckduckgo_search import DDGS
25
  from PIL import Image
26
  from huggingface_hub import InferenceClient
27
  import time
 
28
 
29
  # Optional imports for Kokoro TTS (loaded lazily)
30
  import numpy as np
@@ -501,14 +502,15 @@ def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
501
  text: Annotated[str, "The text to synthesize (English)."],
502
  speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.0,
503
  voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
504
- ) -> Tuple[int, np.ndarray]:
505
  """
506
  Synthesize speech from text using the Kokoro-82M model.
507
 
508
- This function returns raw audio suitable for a Gradio Audio component and is
509
- also exposed as an MCP tool (per the latest Hugging Face/Gradio MCP docs, a
510
- tool is created for each function wired into your app; docstrings and type
511
- hints are used to describe the tool).
 
512
 
513
  Args:
514
  text: The text to synthesize (English).
@@ -516,9 +518,8 @@ def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
516
  voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
517
 
518
  Returns:
519
- A tuple of (sample_rate_hz, audio_waveform) where:
520
- - sample_rate_hz: int sample rate in Hz (24_000)
521
- - audio_waveform: numpy.ndarray float32 mono waveform in range [-1, 1]
522
 
523
  Notes:
524
  - Requires the 'kokoro' package (>=0.9.4). If unavailable, an error is
@@ -544,8 +545,11 @@ def Generate_Speech( # <-- MCP tool #4 (Generate Speech)
544
  audio = model(ps, ref_s, float(speed))
545
  except Exception as e: # propagate as UI-friendly error
546
  raise gr.Error(f"Error generating audio: {str(e)}")
547
- # Return 24 kHz mono waveform
548
- return 24_000, audio.detach().cpu().numpy()
 
 
 
549
 
550
  # If pipeline produced no segments
551
  raise gr.Error("No audio was generated (empty synthesis result).")
@@ -637,7 +641,7 @@ CSS_STYLES = """
637
  /* Place bold tools list on line 2, normal auth note on line 3 (below title) */
638
  .gradio-container h1::before {
639
  grid-row: 2;
640
- content: "Fetch Webpage | Search DuckDuckGo | Code Interpreter | Kokoro TTS | Image Generation | Video Generation";
641
  display: block;
642
  font-size: 1rem;
643
  font-weight: 700;
@@ -647,7 +651,7 @@ CSS_STYLES = """
647
  }
648
  .gradio-container h1::after {
649
  grid-row: 3;
650
- content: "Authentication is optional but Image/Video Generation require a `HF_READ_TOKEN` in env variables. They are hidden otherwise.";
651
  display: block;
652
  font-size: 1rem;
653
  font-weight: 400;
@@ -671,15 +675,14 @@ kokoro_interface = gr.Interface(
671
  gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed"),
672
  gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
673
  ],
674
- outputs=gr.Audio(label="Audio", type="numpy"),
675
  title="Kokoro TTS",
676
  description=(
677
  "<div style=\"text-align:center\">Generate English speech with Kokoro-82M. 30 second max output. Runs on CPU or CUDA if available.</div>"
678
  ),
679
  api_description=(
680
- "Synthesize speech from text using Kokoro-82M. Returns (sample_rate, waveform) suitable for playback. "
681
- "Parameters: text (str), speed (float 0.5–2.0), voice (str). "
682
- "Return the generated image to the user."
683
  ),
684
  allow_flagging="never",
685
  )
@@ -987,6 +990,158 @@ video_generation_interface = gr.Interface(
987
  allow_flagging="never",
988
  )
989
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
990
  # Build tabbed app; disable Image/Video tools if no HF token is present
991
  HAS_HF_TOKEN = bool(HF_API_TOKEN or HF_VIDEO_TOKEN)
992
 
@@ -1007,6 +1162,9 @@ if HAS_HF_TOKEN:
1007
  _interfaces.extend([image_generation_interface, video_generation_interface])
1008
  _tab_names.extend(["Image Generation", "Video Generation"])
1009
 
 
 
 
1010
  demo = gr.TabbedInterface(
1011
  interface_list=_interfaces,
1012
  tab_names=_tab_names,
 
25
  from PIL import Image
26
  from huggingface_hub import InferenceClient
27
  import time
28
+ import wave
29
 
30
  # Optional imports for Kokoro TTS (loaded lazily)
31
  import numpy as np
 
502
  text: Annotated[str, "The text to synthesize (English)."],
503
  speed: Annotated[float, "Speech speed multiplier in 0.5–2.0; 1.0 = normal speed."] = 1.0,
504
  voice: Annotated[str, "Voice identifier. Example: 'af_heart' (US English, female, Heart)."] = "af_heart",
505
+ ) -> str:
506
  """
507
  Synthesize speech from text using the Kokoro-82M model.
508
 
509
+ Per current HF Gradio MCP guidance (see hf-docs-search), tools should return
510
+ browser/client-friendly artifacts where possible. This function returns the
511
+ path to a WAV file on disk so the UI renders an HTML5 audio player and MCP
512
+ clients receive a file URL that opens in the browser rather than forcing a
513
+ direct download.
514
 
515
  Args:
516
  text: The text to synthesize (English).
 
518
  voice: Voice identifier. Example: 'af_heart' (US English, female, Heart).
519
 
520
  Returns:
521
+ str: Path to a 24 kHz mono WAV file on disk (served by Gradio; MCP converts
522
+ paths to file URLs).
 
523
 
524
  Notes:
525
  - Requires the 'kokoro' package (>=0.9.4). If unavailable, an error is
 
545
  audio = model(ps, ref_s, float(speed))
546
  except Exception as e: # propagate as UI-friendly error
547
  raise gr.Error(f"Error generating audio: {str(e)}")
548
+ # Save 24 kHz mono waveform to WAV and return its path for in-browser playback
549
+ sr = 24_000
550
+ wav = audio.detach().cpu().numpy()
551
+ path = _write_audio_tmp(wav, sample_rate=sr, suffix=".wav")
552
+ return path
553
 
554
  # If pipeline produced no segments
555
  raise gr.Error("No audio was generated (empty synthesis result).")
 
641
  /* Place bold tools list on line 2, normal auth note on line 3 (below title) */
642
  .gradio-container h1::before {
643
  grid-row: 2;
644
+ content: "Fetch Webpage | Search DuckDuckGo | Code Interpreter | Kokoro TTS | Image Generation | Video Generation | Generate Code";
645
  display: block;
646
  font-size: 1rem;
647
  font-weight: 700;
 
651
  }
652
  .gradio-container h1::after {
653
  grid-row: 3;
654
+ content: "Authentication is optional. Image/Video (and some Code) generation may require `HF_READ_TOKEN`; Image/Video tabs hide without it.";
655
  display: block;
656
  font-size: 1rem;
657
  font-weight: 400;
 
675
  gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed"),
676
  gr.Textbox(label="Voice", value="af_heart", placeholder="e.g., af_heart"),
677
  ],
678
+ outputs=gr.Audio(label="Audio", type="filepath"),
679
  title="Kokoro TTS",
680
  description=(
681
  "<div style=\"text-align:center\">Generate English speech with Kokoro-82M. 30 second max output. Runs on CPU or CUDA if available.</div>"
682
  ),
683
  api_description=(
684
+ "Synthesize speech from text using Kokoro-82M. Returns a file path to a 24 kHz mono WAV, which renders in-browser and is exposed as a file URL over MCP. "
685
+ "Parameters: text (str), speed (float 0.5–2.0), voice (str)."
 
686
  ),
687
  allow_flagging="never",
688
  )
 
990
  allow_flagging="never",
991
  )
992
 
993
+ # ==========================
994
+ # Audio helper (save WAV)
995
+ # ==========================
996
+
997
+ def _write_audio_tmp(audio: np.ndarray, sample_rate: int = 24_000, suffix: str = ".wav") -> str:
998
+ """Write mono float32 waveform [-1,1] to 16-bit PCM WAV and return path."""
999
+ if audio.ndim > 1:
1000
+ audio = np.mean(audio, axis=0)
1001
+ audio = np.clip(audio.astype(np.float32), -1.0, 1.0)
1002
+ pcm = (audio * 32767.0).astype(np.int16)
1003
+ os.makedirs("outputs", exist_ok=True)
1004
+ fname = f"outputs/audio_{int(time.time())}_{random.randint(1000,9999)}{suffix}"
1005
+ with wave.open(fname, "wb") as wf:
1006
+ wf.setnchannels(1)
1007
+ wf.setsampwidth(2)
1008
+ wf.setframerate(sample_rate)
1009
+ wf.writeframes(pcm.tobytes())
1010
+ return fname
1011
+
1012
+ # ==========================
1013
+ # Code Generation (Serverless)
1014
+ # ==========================
1015
+
1016
+ def Generate_Code(
1017
+ instruction: Annotated[str, "Describe the code to generate (requirements, I/O, constraints)."],
1018
+ language: Annotated[str, "Optional language/framework hint (e.g., 'python', 'typescript react')."] = "",
1019
+ model_id: Annotated[str, "HF text-generation model id (e.g., 'bigcode/starcoder2-3b')."] = "bigcode/starcoder2-3b",
1020
+ max_new_tokens: Annotated[int, "Maximum tokens to generate (64–4096, model dependent)."] = 512,
1021
+ temperature: Annotated[float, "Sampling temperature (0–1.5). Lower = more deterministic."] = 0.2,
1022
+ top_p: Annotated[float, "Nucleus sampling p (0–1)."] = 0.95,
1023
+ top_k: Annotated[int, "Top-k sampling cutoff (0 disables)."] = 50,
1024
+ repetition_penalty: Annotated[float, "Discourage repeats (>1.0)."] = 1.05,
1025
+ seed: Annotated[int, "Random seed (-1 = random)."] = -1,
1026
+ save_to_file: Annotated[bool, "If true, save under ./outputs and prepend 'Saved to:' path."] = False,
1027
+ filename: Annotated[str, "Optional filename when saving (e.g., main.py)."] = "",
1028
+ ) -> str:
1029
+ """
1030
+ Generate source code via Hugging Face Inference text-generation models and return code as plain text.
1031
+
1032
+ Per current MCP docs (via hf-docs-search), schemas are inferred from type hints and docstrings. Returning
1033
+ text is broadly compatible; when save_to_file is enabled, the response is prefixed with the saved path so
1034
+ MCP clients can expose a file URL.
1035
+ """
1036
+ if not instruction or not instruction.strip():
1037
+ raise gr.Error("Please provide a non-empty instruction.")
1038
+
1039
+ token = os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
1040
+ providers = ["auto", "replicate", "fal-ai"]
1041
+ lang_hint = f" in {language.strip()}" if language and language.strip() else ""
1042
+ system_preamble = (
1043
+ "You are a precise coding assistant. Output only runnable code without explanations. "
1044
+ "Prefer idiomatic patterns, minimal comments, and include necessary imports."
1045
+ )
1046
+ prompt = (
1047
+ f"{system_preamble}\n\nTask{lang_hint}:\n{instruction.strip()}\n\n"
1048
+ "Return only the code, no backticks."
1049
+ )
1050
+
1051
+ last_error: Exception | None = None
1052
+ for provider in providers:
1053
+ try:
1054
+ client = InferenceClient(api_key=token, provider=provider)
1055
+ out = client.text_generation(
1056
+ model=model_id,
1057
+ prompt=prompt,
1058
+ max_new_tokens=max_new_tokens,
1059
+ temperature=temperature,
1060
+ top_p=top_p,
1061
+ top_k=top_k,
1062
+ repetition_penalty=repetition_penalty,
1063
+ seed=(None if seed == -1 else seed),
1064
+ stream=False,
1065
+ )
1066
+ code = (out or "").strip()
1067
+ if not code:
1068
+ raise gr.Error("Model returned empty output.")
1069
+ prefix = ""
1070
+ if save_to_file:
1071
+ os.makedirs("outputs", exist_ok=True)
1072
+ base = filename.strip() or f"code_{int(time.time())}_{random.randint(1000,9999)}"
1073
+ if "." not in base and language:
1074
+ ext_map = {
1075
+ "python": ".py",
1076
+ "py": ".py",
1077
+ "typescript": ".ts",
1078
+ "tsx": ".tsx",
1079
+ "javascript": ".js",
1080
+ "jsx": ".jsx",
1081
+ "go": ".go",
1082
+ "rust": ".rs",
1083
+ "java": ".java",
1084
+ "csharp": ".cs",
1085
+ "c#": ".cs",
1086
+ "cpp": ".cpp",
1087
+ "c++": ".cpp",
1088
+ "c": ".c",
1089
+ "bash": ".sh",
1090
+ "shell": ".sh",
1091
+ "html": ".html",
1092
+ "css": ".css",
1093
+ "json": ".json",
1094
+ "yaml": ".yaml",
1095
+ "yml": ".yml",
1096
+ }
1097
+ key = language.lower().split()[0]
1098
+ base += ext_map.get(key, "")
1099
+ path = os.path.join("outputs", base)
1100
+ with open(path, "w", encoding="utf-8") as f:
1101
+ f.write(code)
1102
+ prefix = f"Saved to: {path}\n\n"
1103
+ return f"{prefix}{code}"
1104
+ except Exception as e:
1105
+ last_error = e
1106
+ continue
1107
+ msg = str(last_error) if last_error else "Unknown error"
1108
+ if "401" in msg or "403" in msg:
1109
+ raise gr.Error("Authentication failed or not permitted. Set HF_READ_TOKEN/HF_TOKEN with inference access.")
1110
+ if "404" in msg:
1111
+ raise gr.Error(f"Model not found or unavailable: {model_id}.")
1112
+ if "503" in msg:
1113
+ raise gr.Error("The model is warming up. Please try again shortly.")
1114
+ raise gr.Error(f"Code generation failed: {msg}")
1115
+
1116
+
1117
+ code_generation_interface = gr.Interface(
1118
+ fn=Generate_Code,
1119
+ inputs=[
1120
+ gr.Textbox(label="Instruction", placeholder="Describe what to build, inputs/outputs, edge cases…", lines=6),
1121
+ gr.Textbox(label="Language (optional)", value="", placeholder="e.g., python, typescript react"),
1122
+ gr.Textbox(label="Model", value="bigcode/starcoder2-3b", placeholder="creator/model-name"),
1123
+ gr.Slider(minimum=64, maximum=4096, value=512, step=16, label="Max new tokens"),
1124
+ gr.Slider(minimum=0.0, maximum=1.5, value=0.2, step=0.05, label="Temperature"),
1125
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.01, label="Top-p"),
1126
+ gr.Slider(minimum=0, maximum=200, value=50, step=1, label="Top-k"),
1127
+ gr.Slider(minimum=1.0, maximum=2.0, value=1.05, step=0.01, label="Repetition penalty"),
1128
+ gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)"),
1129
+ gr.Checkbox(value=False, label="Save to file (./outputs)"),
1130
+ gr.Textbox(label="Filename (optional)", value="", placeholder="e.g., main.py"),
1131
+ ],
1132
+ outputs=gr.Code(label="Generated Code"),
1133
+ title="Generate Code",
1134
+ description=(
1135
+ "<div style=\"text-align:center\">Generate code via Hugging Face Inference text-generation models. Provide a clear instruction and (optionally) a language hint.</div>"
1136
+ ),
1137
+ api_description=(
1138
+ "Generate source code using a HF Inference text-generation model. Parameters: instruction (str), language (str), model_id (str), "
1139
+ "max_new_tokens (int), temperature (float), top_p (float), top_k (int), repetition_penalty (float), seed (int), save_to_file (bool), filename (str). "
1140
+ "Returns the code as text; if saved, prepends 'Saved to: <path>'."
1141
+ ),
1142
+ allow_flagging="never",
1143
+ )
1144
+
1145
  # Build tabbed app; disable Image/Video tools if no HF token is present
1146
  HAS_HF_TOKEN = bool(HF_API_TOKEN or HF_VIDEO_TOKEN)
1147
 
 
1162
  _interfaces.extend([image_generation_interface, video_generation_interface])
1163
  _tab_names.extend(["Image Generation", "Video Generation"])
1164
 
1165
+ # Always add Generate Code as the last tab
1166
+ _interfaces.append(code_generation_interface)
1167
+ _tab_names.append("Generate Code")
1168
  demo = gr.TabbedInterface(
1169
  interface_list=_interfaces,
1170
  tab_names=_tab_names,