Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

mgbam commited on 7 days ago

Commit

0e47704

verified ·

1 Parent(s): 3333933

Update mcp/gemini.py

Browse files

Files changed (1) hide show

mcp/gemini.py +51 -37

mcp/gemini.py CHANGED Viewed

@@ -1,54 +1,68 @@
-"""
-Gemini helper · resilient version
-• Tries the newest "gemini-1.5-flash" first (fast, 128k ctx)
-• Falls back to "gemini-pro" (32k ctx)
-• If the SDK returns 404 / PERMISSION_DENIED the call degrades
-  gracefully to an empty string so orchestrator can switch to OpenAI.
 """
-import os, asyncio, google.generativeai as genai
 from google.api_core import exceptions as gexc
-GEN_KEY = os.getenv("GEMINI_KEY")  # set in HF “Secrets”
-if GEN_KEY:
-    genai.configure(api_key=GEN_KEY)
-# cache models to avoid re-instantiation per request
-_MODELS = {}
 def _get_model(name: str):
-    if name not in _MODELS:
-        _MODELS[name] = genai.GenerativeModel(name)
-    return _MODELS[name]
-async def _generate(prompt: str, model_name: str, temp: float = 0.3) -> str:
-    try:
-        rsp = await asyncio.to_thread(
-            _get_model(model_name).generate_content,
-            prompt,
-            generation_config={"temperature": temp},
-        )
-        return rsp.text.strip()
-    except (gexc.NotFound, gexc.PermissionDenied):
-        # propagate None so orchestrator can decide to fall back
-        return ""
-# ---------- public wrappers ----------
-async def gemini_summarize(text: str, words: int = 150) -> str:
-    prompt = f"Summarize in ≤{words} words:\n{text[:12000]}"
-    # try Flash first → Pro second
     out = await _generate(prompt, "gemini-1.5-flash")
     if not out:
         out = await _generate(prompt, "gemini-pro")
     return out
-async def gemini_qa(question: str, context: str = "") -> str:
     prompt = (
-        "Use the context to answer concisely.\n\n"
         f"Context:\n{context[:10000]}\n\nQ: {question}\nA:"
     )
     out = await _generate(prompt, "gemini-1.5-flash")

+#!/usr/bin/env python3
+"""MedGenesis – **Gemini** (Google Generative AI) async helper.
+Key behaviours
+~~~~~~~~~~~~~~
+* Tries the fast **`gemini-1.5-flash`** model first → falls back to
+  **`gemini-pro`** when flash unavailable or quota‑exceeded.
+* Exponential back‑off retry (2×, 4×) for transient 5xx/429.
+* Singleton model cache to avoid re‑instantiation cost.
+* Returns **empty string** on irrecoverable errors so orchestrator can
+  gracefully pivot to OpenAI.
 """
+from __future__ import annotations
+import os, asyncio, functools
+from typing import Dict
+import google.generativeai as genai
 from google.api_core import exceptions as gexc
+_API_KEY = os.getenv("GEMINI_KEY")
+if not _API_KEY:
+    raise RuntimeError("GEMINI_KEY env variable missing – set it in HF Secrets")
+genai.configure(api_key=_API_KEY)
+# ---------------------------------------------------------------------
+# Model cache
+# ---------------------------------------------------------------------
+@functools.lru_cache(maxsize=4)
 def _get_model(name: str):
+    return genai.GenerativeModel(name)
+async def _generate(prompt: str, model_name: str, *, temperature: float = 0.3, retries: int = 3) -> str:
+    """Run generation inside a ThreadPool – Gemini SDK is blocking."""
+    delay = 2
+    for _ in range(retries):
+        try:
+            resp = await asyncio.to_thread(
+                _get_model(model_name).generate_content,
+                prompt,
+                generation_config={"temperature": temperature},
+            )
+            return resp.text.strip()
+        except (gexc.ResourceExhausted, gexc.ServiceUnavailable):
+            await asyncio.sleep(delay)
+            delay *= 2
+        except (gexc.NotFound, gexc.PermissionDenied):
+            return ""  # unrecoverable – model/key unavailable
+    return ""  # after retries
+# ---------------------------------------------------------------------
+# Public wrappers
+# ---------------------------------------------------------------------
+async def gemini_summarize(text: str, *, words: int = 150) -> str:
+    prompt = f"Summarize in ≤{words} words:\n\n{text[:12000]}"
     out = await _generate(prompt, "gemini-1.5-flash")
     if not out:
         out = await _generate(prompt, "gemini-pro")
     return out
+async def gemini_qa(question: str, *, context: str = "") -> str:
     prompt = (
+        "You are an advanced biomedical research agent. Use the context to answer concisely.\n\n"
         f"Context:\n{context[:10000]}\n\nQ: {question}\nA:"
     )
     out = await _generate(prompt, "gemini-1.5-flash")