Update mcp/disgenet.py
Browse files- mcp/disgenet.py +49 -59
mcp/disgenet.py
CHANGED
@@ -1,68 +1,58 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""MedGenesis β DisGeNET async helper (disease β gene associations).
|
3 |
-
|
4 |
-
Features
|
5 |
-
~~~~~~~~
|
6 |
-
* Accepts optional Bearer token via env **`DISGENET_KEY`** (rateβlimit free).
|
7 |
-
* Endpoint: `https://www.disgenet.org/api/gda/disease/<disease_name>`
|
8 |
-
* Backβoff retry (2Γ, 4Γ) for 429/5xx.
|
9 |
-
* LRU cache (24Β h, 512 queries) to minimise API calls.
|
10 |
-
* Returns top *N* rows (defaultΒ =Β 10) as `list[dict]`.
|
11 |
"""
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
|
|
14 |
import os, asyncio, httpx
|
15 |
from functools import lru_cache
|
16 |
-
from typing import List, Dict
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
# ---------------------------------------------------------------------
|
26 |
-
async def _fetch(url: str, *, retries: int = 3) -> List[Dict[str, Any]]:
|
27 |
-
delay = 2
|
28 |
-
for _ in range(retries):
|
29 |
-
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
|
30 |
-
resp = await cli.get(url, params={"source": "ALL", "format": "json"})
|
31 |
-
if resp.status_code == 200:
|
32 |
-
return resp.json()
|
33 |
-
if resp.status_code in {429, 500, 503}:
|
34 |
-
await asyncio.sleep(delay)
|
35 |
-
delay *= 2
|
36 |
-
continue
|
37 |
-
resp.raise_for_status()
|
38 |
-
return [] # final fallback
|
39 |
|
40 |
-
#
|
41 |
-
# Public API β cached 24Β h
|
42 |
-
# ---------------------------------------------------------------------
|
43 |
@lru_cache(maxsize=512)
|
44 |
-
async def disease_to_genes(disease_name: str,
|
45 |
-
|
46 |
-
|
47 |
-
Parameters
|
48 |
-
----------
|
49 |
-
disease_name : str
|
50 |
-
Freeβtext disease label (e.g. "glioblastoma"). Internally converted
|
51 |
-
to lowercase and URLβencoded.
|
52 |
-
limit : int, optional
|
53 |
-
Maximum number of rows to return (defaultΒ =Β 10).
|
54 |
"""
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""
|
2 |
+
disgenet.py Β· Disease-Gene associations helper
|
3 |
+
Docs: https://www.disgenet.com/downloads (REST v1) π
|
4 |
+
|
5 |
+
Change-log
|
6 |
+
ββββββββββ
|
7 |
+
β’ 2025-06-25 β .org β .COM redirect (301) broke calls.
|
8 |
+
We now default to https://www.disgenet.com/api
|
9 |
+
and still follow redirects if they add a CDN later.
|
10 |
+
β’ Graceful retry + 24 h LRU-cache.
|
11 |
+
β’ Empty list on any error so orchestrator never crashes.
|
12 |
+
"""
|
13 |
|
14 |
+
from __future__ import annotations
|
15 |
import os, asyncio, httpx
|
16 |
from functools import lru_cache
|
17 |
+
from typing import List, Dict
|
18 |
|
19 |
+
_TOKEN = os.getenv("DISGENET_KEY") # optional Bearer token
|
20 |
+
_BASE = "https://www.disgenet.com/api" # β new canonical host
|
21 |
+
_HDRS = {"Accept": "application/json"}
|
22 |
+
if _TOKEN:
|
23 |
+
_HDRS["Authorization"] = f"Bearer {_TOKEN}"
|
24 |
|
25 |
+
_TIMEOUT = 12
|
26 |
+
_RETRIES = 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
29 |
@lru_cache(maxsize=512)
|
30 |
+
async def disease_to_genes(disease_name: str,
|
31 |
+
limit: int = 10) -> List[Dict]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
"""
|
33 |
+
Return top-N gene associations for *disease_name*.
|
34 |
+
Empty list on failure or if none found.
|
35 |
+
"""
|
36 |
+
url = f"{_BASE}/gda/disease/{disease_name.lower()}"
|
37 |
+
params = {"source": "ALL", "format": "json"}
|
38 |
+
|
39 |
+
async def _one_call() -> List[Dict]:
|
40 |
+
async with httpx.AsyncClient(timeout=_TIMEOUT,
|
41 |
+
headers=_HDRS,
|
42 |
+
follow_redirects=True) as cli:
|
43 |
+
r = await cli.get(url, params=params)
|
44 |
+
if r.status_code == 404:
|
45 |
+
return []
|
46 |
+
r.raise_for_status()
|
47 |
+
return r.json()[:limit]
|
48 |
+
|
49 |
+
delay = 0.0
|
50 |
+
for _ in range(_RETRIES):
|
51 |
+
try:
|
52 |
+
return await _one_call()
|
53 |
+
except (httpx.HTTPStatusError, httpx.ReadTimeout):
|
54 |
+
await asyncio.sleep(delay or 0.7)
|
55 |
+
delay = 0.0 # retry only once
|
56 |
+
except Exception:
|
57 |
+
break
|
58 |
+
return [] # graceful fallback
|