mgbam commited on
Commit
d0cb899
Β·
verified Β·
1 Parent(s): 08a3e96

Update mcp/disgenet.py

Browse files
Files changed (1) hide show
  1. mcp/disgenet.py +49 -59
mcp/disgenet.py CHANGED
@@ -1,68 +1,58 @@
1
- #!/usr/bin/env python3
2
- """MedGenesis – DisGeNET async helper (disease β†’ gene associations).
3
-
4
- Features
5
- ~~~~~~~~
6
- * Accepts optional Bearer token via env **`DISGENET_KEY`** (rate‑limit free).
7
- * Endpoint: `https://www.disgenet.org/api/gda/disease/<disease_name>`
8
- * Back‑off retry (2Γ—, 4Γ—) for 429/5xx.
9
- * LRU cache (24Β h, 512 queries) to minimise API calls.
10
- * Returns top *N* rows (defaultΒ =Β 10) as `list[dict]`.
11
  """
12
- from __future__ import annotations
 
 
 
 
 
 
 
 
 
 
13
 
 
14
  import os, asyncio, httpx
15
  from functools import lru_cache
16
- from typing import List, Dict, Any
17
 
18
- _BASE = "https://www.disgenet.org/api/gda/disease"
19
- _TOKEN = os.getenv("DISGENET_KEY")
20
- _HEADERS = {"Authorization": f"Bearer {_TOKEN}"} if (_TOKEN := os.getenv("DISGENET_KEY")) else {}
21
- _TIMEOUT = 15
 
22
 
23
- # ---------------------------------------------------------------------
24
- # Internal fetch with retry
25
- # ---------------------------------------------------------------------
26
- async def _fetch(url: str, *, retries: int = 3) -> List[Dict[str, Any]]:
27
- delay = 2
28
- for _ in range(retries):
29
- async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
30
- resp = await cli.get(url, params={"source": "ALL", "format": "json"})
31
- if resp.status_code == 200:
32
- return resp.json()
33
- if resp.status_code in {429, 500, 503}:
34
- await asyncio.sleep(delay)
35
- delay *= 2
36
- continue
37
- resp.raise_for_status()
38
- return [] # final fallback
39
 
40
- # ---------------------------------------------------------------------
41
- # Public API – cached 24Β h
42
- # ---------------------------------------------------------------------
43
  @lru_cache(maxsize=512)
44
- async def disease_to_genes(disease_name: str, *, limit: int = 10) -> List[Dict]:
45
- """Return up to *limit* gene‑association dicts for *disease_name*.
46
-
47
- Parameters
48
- ----------
49
- disease_name : str
50
- Free‑text disease label (e.g. "glioblastoma"). Internally converted
51
- to lowercase and URL‑encoded.
52
- limit : int, optional
53
- Maximum number of rows to return (defaultΒ =Β 10).
54
  """
55
- url = f"{_BASE}/{disease_name.lower()}"
56
- data = await _fetch(url)
57
- return data[:limit]
58
-
59
-
60
- # ---------------------------------------------------------------------
61
- # CLI demo
62
- # ---------------------------------------------------------------------
63
- if __name__ == "__main__":
64
- import json
65
- async def _demo():
66
- out = await disease_to_genes("glioblastoma", limit=5)
67
- print(json.dumps(out[:2], indent=2))
68
- asyncio.run(_demo())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ disgenet.py Β· Disease-Gene associations helper
3
+ Docs: https://www.disgenet.com/downloads (REST v1) πŸ›ˆ
4
+
5
+ Change-log
6
+ ──────────
7
+ β€’ 2025-06-25 – .org β†’ .COM redirect (301) broke calls.
8
+ We now default to https://www.disgenet.com/api
9
+ and still follow redirects if they add a CDN later.
10
+ β€’ Graceful retry + 24 h LRU-cache.
11
+ β€’ Empty list on any error so orchestrator never crashes.
12
+ """
13
 
14
+ from __future__ import annotations
15
  import os, asyncio, httpx
16
  from functools import lru_cache
17
+ from typing import List, Dict
18
 
19
+ _TOKEN = os.getenv("DISGENET_KEY") # optional Bearer token
20
+ _BASE = "https://www.disgenet.com/api" # ← new canonical host
21
+ _HDRS = {"Accept": "application/json"}
22
+ if _TOKEN:
23
+ _HDRS["Authorization"] = f"Bearer {_TOKEN}"
24
 
25
+ _TIMEOUT = 12
26
+ _RETRIES = 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # ────────────────────────────────────────────────────────────────────
 
 
29
  @lru_cache(maxsize=512)
30
+ async def disease_to_genes(disease_name: str,
31
+ limit: int = 10) -> List[Dict]:
 
 
 
 
 
 
 
 
32
  """
33
+ Return top-N gene associations for *disease_name*.
34
+ Empty list on failure or if none found.
35
+ """
36
+ url = f"{_BASE}/gda/disease/{disease_name.lower()}"
37
+ params = {"source": "ALL", "format": "json"}
38
+
39
+ async def _one_call() -> List[Dict]:
40
+ async with httpx.AsyncClient(timeout=_TIMEOUT,
41
+ headers=_HDRS,
42
+ follow_redirects=True) as cli:
43
+ r = await cli.get(url, params=params)
44
+ if r.status_code == 404:
45
+ return []
46
+ r.raise_for_status()
47
+ return r.json()[:limit]
48
+
49
+ delay = 0.0
50
+ for _ in range(_RETRIES):
51
+ try:
52
+ return await _one_call()
53
+ except (httpx.HTTPStatusError, httpx.ReadTimeout):
54
+ await asyncio.sleep(delay or 0.7)
55
+ delay = 0.0 # retry only once
56
+ except Exception:
57
+ break
58
+ return [] # graceful fallback