Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

mgbam commited on Jun 21

Commit

7329ecf

verified ·

1 Parent(s): 7117e78

Update mcp/gemini.py

Browse files

Files changed (1) hide show

mcp/gemini.py +45 -17

mcp/gemini.py CHANGED Viewed

@@ -1,29 +1,57 @@
-# mcp/gemini.py
 """
-Lightweight Gemini-Pro helper (text in → text out).
-Requires env var GEMINI_KEY.
 """
-import os, asyncio, google.generativeai as genai  # SDK  :contentReference[oaicite:1]{index=1}
-GEN_KEY = os.getenv("GEMINI_KEY")
 if GEN_KEY:
     genai.configure(api_key=GEN_KEY)
-_MODEL = None
-def _model():
-    global _MODEL
-    if _MODEL is None:
-        _MODEL = genai.GenerativeModel("gemini-pro")  # 32 k ctx  :contentReference[oaicite:2]{index=2}
-    return _MODEL
-# ---------- public helpers ----------
 async def gemini_summarize(text: str, words: int = 150) -> str:
     prompt = f"Summarize in ≤{words} words:\n{text[:12000]}"
-    rsp = await asyncio.to_thread(_model().generate_content, prompt)
-    return rsp.text
 async def gemini_qa(question: str, context: str = "") -> str:
-    prompt = f"Answer briefly.\nContext:\n{context[:10000]}\n\nQ: {question}\nA:"
-    rsp = await asyncio.to_thread(_model().generate_content, prompt)
-    return rsp.text

 """
+Gemini helper · resilient version
+• Tries the newest "gemini-1.5-flash" first (fast, 128k ctx)
+• Falls back to "gemini-pro" (32k ctx)
+• If the SDK returns 404 / PERMISSION_DENIED the call degrades
+  gracefully to an empty string so orchestrator can switch to OpenAI.
 """
+import os, asyncio, google.generativeai as genai
+from google.api_core import exceptions as gexc
+GEN_KEY = os.getenv("GEMINI_KEY")  # set in HF “Secrets”
 if GEN_KEY:
     genai.configure(api_key=GEN_KEY)
+# cache models to avoid re-instantiation per request
+_MODELS = {}
+def _get_model(name: str):
+    if name not in _MODELS:
+        _MODELS[name] = genai.GenerativeModel(name)
+    return _MODELS[name]
+async def _generate(prompt: str, model_name: str, temp: float = 0.3) -> str:
+    try:
+        rsp = await asyncio.to_thread(
+            _get_model(model_name).generate_content,
+            prompt,
+            generation_config={"temperature": temp},
+        )
+        return rsp.text.strip()
+    except (gexc.NotFound, gexc.PermissionDenied):
+        # propagate None so orchestrator can decide to fall back
+        return ""
+# ---------- public wrappers ----------
 async def gemini_summarize(text: str, words: int = 150) -> str:
     prompt = f"Summarize in ≤{words} words:\n{text[:12000]}"
+    # try Flash first → Pro second
+    out = await _generate(prompt, "gemini-1.5-flash")
+    if not out:
+        out = await _generate(prompt, "gemini-pro")
+    return out
 async def gemini_qa(question: str, context: str = "") -> str:
+    prompt = (
+        "Use the context to answer concisely.\n\n"
+        f"Context:\n{context[:10000]}\n\nQ: {question}\nA:"
+    )
+    out = await _generate(prompt, "gemini-1.5-flash")
+    if not out:
+        out = await _generate(prompt, "gemini-pro")
+    return out or "Gemini could not answer (model/key unavailable)."