Update mcp/alerts.py
Browse files- mcp/alerts.py +86 -28
mcp/alerts.py
CHANGED
@@ -1,48 +1,106 @@
|
|
1 |
# mcp/alerts.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"""
|
3 |
-
|
4 |
-
• Stores the last 30 PubMed/arXiv links per query.
|
5 |
-
• Returns a dict of {query: [new_links]} when fresh papers appear.
|
6 |
-
Implementation is intentionally simple: JSON on disk + orchestrate_search.
|
7 |
-
"""
|
8 |
|
9 |
-
import json,
|
10 |
from pathlib import Path
|
11 |
-
from typing import
|
12 |
|
13 |
from mcp.orchestrator import orchestrate_search
|
14 |
|
15 |
-
|
16 |
-
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def _read_db() -> Dict[str, List[str]]:
|
20 |
-
if
|
21 |
-
|
|
|
|
|
|
|
22 |
return {}
|
23 |
|
24 |
|
25 |
def _write_db(data: Dict[str, List[str]]):
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
28 |
|
|
|
|
|
|
|
29 |
async def check_alerts(queries: List[str]) -> Dict[str, List[str]]:
|
30 |
-
"""
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
links = [p["link"] for p in res["papers"]]
|
40 |
prev = set(db.get(q, []))
|
41 |
fresh = [l for l in links if l not in prev]
|
42 |
if fresh:
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
await asyncio.gather(*[
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# mcp/alerts.py
|
2 |
+
#!/usr/bin/env python3
|
3 |
+
"""MedGenesis – saved‑query alert helper (async).
|
4 |
+
|
5 |
+
Monitors previously saved queries (PubMed/arXiv) and notifies when new
|
6 |
+
papers appear. Stores a lightweight JSON DB on disk (**locally inside
|
7 |
+
Space**), keeping the latest *N* paper links per query.
|
8 |
+
|
9 |
+
Changes vs. legacy version
|
10 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
11 |
+
* **Thread‑safe JSON IO** with `asyncio.Lock` (handles simultaneous
|
12 |
+
Streamlit sessions).
|
13 |
+
* Exponential‑back‑off retry around `orchestrate_search` so one bad
|
14 |
+
request doesn’t kill the whole loop.
|
15 |
+
* Maximum DB size of 100 queries; oldest evicted automatically.
|
16 |
+
* Ensures atomic file write via `Path.write_text` to a temp file.
|
17 |
+
|
18 |
+
DB location: `<project_root>/data/medgen_alerts.json` (created on first run).
|
19 |
"""
|
20 |
+
from __future__ import annotations
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
import asyncio, json, tempfile
|
23 |
from pathlib import Path
|
24 |
+
from typing import Dict, List
|
25 |
|
26 |
from mcp.orchestrator import orchestrate_search
|
27 |
|
28 |
+
_DB_PATH = (Path(__file__).parent / "data" / "medgen_alerts.json").resolve()
|
29 |
+
_DB_PATH.parent.mkdir(exist_ok=True)
|
30 |
|
31 |
+
_MAX_IDS = 30 # keep last N paper links per query
|
32 |
+
_MAX_QUERIES = 100 # protect runaway DB growth
|
33 |
+
|
34 |
+
_lock = asyncio.Lock()
|
35 |
+
|
36 |
+
# ---------------------------------------------------------------------
|
37 |
+
# Internal JSON helpers
|
38 |
+
# ---------------------------------------------------------------------
|
39 |
|
40 |
def _read_db() -> Dict[str, List[str]]:
|
41 |
+
if _DB_PATH.exists():
|
42 |
+
try:
|
43 |
+
return json.loads(_DB_PATH.read_text())
|
44 |
+
except json.JSONDecodeError:
|
45 |
+
return {}
|
46 |
return {}
|
47 |
|
48 |
|
49 |
def _write_db(data: Dict[str, List[str]]):
|
50 |
+
# atomic write: write to tmp then replace
|
51 |
+
tmp = tempfile.NamedTemporaryFile("w", delete=False, dir=_DB_PATH.parent)
|
52 |
+
json.dump(data, tmp, indent=2)
|
53 |
+
tmp.flush()
|
54 |
+
Path(tmp.name).replace(_DB_PATH)
|
55 |
|
56 |
+
# ---------------------------------------------------------------------
|
57 |
+
# Public API
|
58 |
+
# ---------------------------------------------------------------------
|
59 |
async def check_alerts(queries: List[str]) -> Dict[str, List[str]]:
|
60 |
+
"""Return `{query: [fresh_links]}` for queries with new papers."""
|
61 |
+
async with _lock: # serialize DB read/write
|
62 |
+
db = _read_db()
|
63 |
+
|
64 |
+
new_links_map: Dict[str, List[str]] = {}
|
65 |
+
|
66 |
+
async def _process(q: str):
|
67 |
+
# Retry orchestrate_search up to 3× (2 s back‑off)
|
68 |
+
delay = 2
|
69 |
+
for _ in range(3):
|
70 |
+
try:
|
71 |
+
res = await orchestrate_search(q)
|
72 |
+
break
|
73 |
+
except Exception:
|
74 |
+
await asyncio.sleep(delay)
|
75 |
+
delay *= 2
|
76 |
+
else:
|
77 |
+
return # give up on this query
|
78 |
+
|
79 |
links = [p["link"] for p in res["papers"]]
|
80 |
prev = set(db.get(q, []))
|
81 |
fresh = [l for l in links if l not in prev]
|
82 |
if fresh:
|
83 |
+
new_links_map[q] = fresh
|
84 |
+
db[q] = links[:_MAX_IDS]
|
85 |
+
|
86 |
+
await asyncio.gather(*[_process(q) for q in queries])
|
87 |
+
|
88 |
+
# Trim DB if exceeding query cap
|
89 |
+
if len(db) > _MAX_QUERIES:
|
90 |
+
for key in list(db.keys())[ _MAX_QUERIES: ]:
|
91 |
+
db.pop(key, None)
|
92 |
+
|
93 |
+
async with _lock:
|
94 |
+
_write_db(db)
|
95 |
+
|
96 |
+
return new_links_map
|
97 |
+
|
98 |
+
|
99 |
+
# ---------------------------------------------------------------------
|
100 |
+
# CLI demo
|
101 |
+
# ---------------------------------------------------------------------
|
102 |
+
if __name__ == "__main__":
|
103 |
+
async def _demo():
|
104 |
+
demo_map = await check_alerts(["glioblastoma CRISPR"])
|
105 |
+
print("Fresh:", demo_map)
|
106 |
+
asyncio.run(_demo())
|