mgbam commited on
Commit
0e47704
·
verified ·
1 Parent(s): 3333933

Update mcp/gemini.py

Browse files
Files changed (1) hide show
  1. mcp/gemini.py +51 -37
mcp/gemini.py CHANGED
@@ -1,54 +1,68 @@
1
- """
2
- Gemini helper · resilient version
3
 
4
- Tries the newest "gemini-1.5-flash" first (fast, 128k ctx)
5
- • Falls back to "gemini-pro" (32k ctx)
6
- If the SDK returns 404 / PERMISSION_DENIED the call degrades
7
- gracefully to an empty string so orchestrator can switch to OpenAI.
 
 
 
 
8
  """
 
 
 
 
9
 
10
- import os, asyncio, google.generativeai as genai
11
  from google.api_core import exceptions as gexc
12
 
13
- GEN_KEY = os.getenv("GEMINI_KEY") # set in HF “Secrets”
14
- if GEN_KEY:
15
- genai.configure(api_key=GEN_KEY)
16
 
17
- # cache models to avoid re-instantiation per request
18
- _MODELS = {}
19
 
 
 
 
 
20
  def _get_model(name: str):
21
- if name not in _MODELS:
22
- _MODELS[name] = genai.GenerativeModel(name)
23
- return _MODELS[name]
24
-
25
-
26
- async def _generate(prompt: str, model_name: str, temp: float = 0.3) -> str:
27
- try:
28
- rsp = await asyncio.to_thread(
29
- _get_model(model_name).generate_content,
30
- prompt,
31
- generation_config={"temperature": temp},
32
- )
33
- return rsp.text.strip()
34
- except (gexc.NotFound, gexc.PermissionDenied):
35
- # propagate None so orchestrator can decide to fall back
36
- return ""
37
-
38
-
39
- # ---------- public wrappers ----------
40
- async def gemini_summarize(text: str, words: int = 150) -> str:
41
- prompt = f"Summarize in ≤{words} words:\n{text[:12000]}"
42
- # try Flash first → Pro second
 
 
 
 
43
  out = await _generate(prompt, "gemini-1.5-flash")
44
  if not out:
45
  out = await _generate(prompt, "gemini-pro")
46
  return out
47
 
48
-
49
- async def gemini_qa(question: str, context: str = "") -> str:
50
  prompt = (
51
- "Use the context to answer concisely.\n\n"
52
  f"Context:\n{context[:10000]}\n\nQ: {question}\nA:"
53
  )
54
  out = await _generate(prompt, "gemini-1.5-flash")
 
1
+ #!/usr/bin/env python3
2
+ """MedGenesis – **Gemini** (Google Generative AI) async helper.
3
 
4
+ Key behaviours
5
+ ~~~~~~~~~~~~~~
6
+ * Tries the fast **`gemini-1.5-flash`** model first falls back to
7
+ **`gemini-pro`** when flash unavailable or quota‑exceeded.
8
+ * Exponential back‑off retry (2×, 4×) for transient 5xx/429.
9
+ * Singleton model cache to avoid re‑instantiation cost.
10
+ * Returns **empty string** on irrecoverable errors so orchestrator can
11
+ gracefully pivot to OpenAI.
12
  """
13
+ from __future__ import annotations
14
+
15
+ import os, asyncio, functools
16
+ from typing import Dict
17
 
18
+ import google.generativeai as genai
19
  from google.api_core import exceptions as gexc
20
 
21
+ _API_KEY = os.getenv("GEMINI_KEY")
22
+ if not _API_KEY:
23
+ raise RuntimeError("GEMINI_KEY env variable missing – set it in HF Secrets")
24
 
25
+ genai.configure(api_key=_API_KEY)
 
26
 
27
+ # ---------------------------------------------------------------------
28
+ # Model cache
29
+ # ---------------------------------------------------------------------
30
+ @functools.lru_cache(maxsize=4)
31
  def _get_model(name: str):
32
+ return genai.GenerativeModel(name)
33
+
34
+
35
+ async def _generate(prompt: str, model_name: str, *, temperature: float = 0.3, retries: int = 3) -> str:
36
+ """Run generation inside a ThreadPool – Gemini SDK is blocking."""
37
+ delay = 2
38
+ for _ in range(retries):
39
+ try:
40
+ resp = await asyncio.to_thread(
41
+ _get_model(model_name).generate_content,
42
+ prompt,
43
+ generation_config={"temperature": temperature},
44
+ )
45
+ return resp.text.strip()
46
+ except (gexc.ResourceExhausted, gexc.ServiceUnavailable):
47
+ await asyncio.sleep(delay)
48
+ delay *= 2
49
+ except (gexc.NotFound, gexc.PermissionDenied):
50
+ return "" # unrecoverable model/key unavailable
51
+ return "" # after retries
52
+
53
+ # ---------------------------------------------------------------------
54
+ # Public wrappers
55
+ # ---------------------------------------------------------------------
56
+ async def gemini_summarize(text: str, *, words: int = 150) -> str:
57
+ prompt = f"Summarize in ≤{words} words:\n\n{text[:12000]}"
58
  out = await _generate(prompt, "gemini-1.5-flash")
59
  if not out:
60
  out = await _generate(prompt, "gemini-pro")
61
  return out
62
 
63
+ async def gemini_qa(question: str, *, context: str = "") -> str:
 
64
  prompt = (
65
+ "You are an advanced biomedical research agent. Use the context to answer concisely.\n\n"
66
  f"Context:\n{context[:10000]}\n\nQ: {question}\nA:"
67
  )
68
  out = await _generate(prompt, "gemini-1.5-flash")