MCP_Res / mcp /gemini.py
mgbam's picture
Update mcp/gemini.py
0e47704 verified
#!/usr/bin/env python3
"""MedGenesis – **Gemini** (Google Generative AI) async helper.
Key behaviours
~~~~~~~~~~~~~~
* Tries the fast **`gemini-1.5-flash`** model first → falls back to
**`gemini-pro`** when flash unavailable or quota‑exceeded.
* Exponential back‑off retry (2×, 4×) for transient 5xx/429.
* Singleton model cache to avoid re‑instantiation cost.
* Returns **empty string** on irrecoverable errors so orchestrator can
gracefully pivot to OpenAI.
"""
from __future__ import annotations
import os, asyncio, functools
from typing import Dict
import google.generativeai as genai
from google.api_core import exceptions as gexc
_API_KEY = os.getenv("GEMINI_KEY")
if not _API_KEY:
raise RuntimeError("GEMINI_KEY env variable missing – set it in HF Secrets")
genai.configure(api_key=_API_KEY)
# ---------------------------------------------------------------------
# Model cache
# ---------------------------------------------------------------------
@functools.lru_cache(maxsize=4)
def _get_model(name: str):
return genai.GenerativeModel(name)
async def _generate(prompt: str, model_name: str, *, temperature: float = 0.3, retries: int = 3) -> str:
"""Run generation inside a ThreadPool – Gemini SDK is blocking."""
delay = 2
for _ in range(retries):
try:
resp = await asyncio.to_thread(
_get_model(model_name).generate_content,
prompt,
generation_config={"temperature": temperature},
)
return resp.text.strip()
except (gexc.ResourceExhausted, gexc.ServiceUnavailable):
await asyncio.sleep(delay)
delay *= 2
except (gexc.NotFound, gexc.PermissionDenied):
return "" # unrecoverable – model/key unavailable
return "" # after retries
# ---------------------------------------------------------------------
# Public wrappers
# ---------------------------------------------------------------------
async def gemini_summarize(text: str, *, words: int = 150) -> str:
prompt = f"Summarize in ≤{words} words:\n\n{text[:12000]}"
out = await _generate(prompt, "gemini-1.5-flash")
if not out:
out = await _generate(prompt, "gemini-pro")
return out
async def gemini_qa(question: str, *, context: str = "") -> str:
prompt = (
"You are an advanced biomedical research agent. Use the context to answer concisely.\n\n"
f"Context:\n{context[:10000]}\n\nQ: {question}\nA:"
)
out = await _generate(prompt, "gemini-1.5-flash")
if not out:
out = await _generate(prompt, "gemini-pro")
return out or "Gemini could not answer (model/key unavailable)."