|
|
|
"""MedGenesis – **Gemini** (Google Generative AI) async helper. |
|
|
|
Key behaviours |
|
~~~~~~~~~~~~~~ |
|
* Tries the fast **`gemini-1.5-flash`** model first → falls back to |
|
**`gemini-pro`** when flash unavailable or quota‑exceeded. |
|
* Exponential back‑off retry (2×, 4×) for transient 5xx/429. |
|
* Singleton model cache to avoid re‑instantiation cost. |
|
* Returns **empty string** on irrecoverable errors so orchestrator can |
|
gracefully pivot to OpenAI. |
|
""" |
|
from __future__ import annotations |
|
|
|
import os, asyncio, functools |
|
from typing import Dict |
|
|
|
import google.generativeai as genai |
|
from google.api_core import exceptions as gexc |
|
|
|
_API_KEY = os.getenv("GEMINI_KEY") |
|
if not _API_KEY: |
|
raise RuntimeError("GEMINI_KEY env variable missing – set it in HF Secrets") |
|
|
|
genai.configure(api_key=_API_KEY) |
|
|
|
|
|
|
|
|
|
@functools.lru_cache(maxsize=4) |
|
def _get_model(name: str): |
|
return genai.GenerativeModel(name) |
|
|
|
|
|
async def _generate(prompt: str, model_name: str, *, temperature: float = 0.3, retries: int = 3) -> str: |
|
"""Run generation inside a ThreadPool – Gemini SDK is blocking.""" |
|
delay = 2 |
|
for _ in range(retries): |
|
try: |
|
resp = await asyncio.to_thread( |
|
_get_model(model_name).generate_content, |
|
prompt, |
|
generation_config={"temperature": temperature}, |
|
) |
|
return resp.text.strip() |
|
except (gexc.ResourceExhausted, gexc.ServiceUnavailable): |
|
await asyncio.sleep(delay) |
|
delay *= 2 |
|
except (gexc.NotFound, gexc.PermissionDenied): |
|
return "" |
|
return "" |
|
|
|
|
|
|
|
|
|
async def gemini_summarize(text: str, *, words: int = 150) -> str: |
|
prompt = f"Summarize in ≤{words} words:\n\n{text[:12000]}" |
|
out = await _generate(prompt, "gemini-1.5-flash") |
|
if not out: |
|
out = await _generate(prompt, "gemini-pro") |
|
return out |
|
|
|
async def gemini_qa(question: str, *, context: str = "") -> str: |
|
prompt = ( |
|
"You are an advanced biomedical research agent. Use the context to answer concisely.\n\n" |
|
f"Context:\n{context[:10000]}\n\nQ: {question}\nA:" |
|
) |
|
out = await _generate(prompt, "gemini-1.5-flash") |
|
if not out: |
|
out = await _generate(prompt, "gemini-pro") |
|
return out or "Gemini could not answer (model/key unavailable)." |
|
|