mgbam commited on
Commit
4764268
Β·
verified Β·
1 Parent(s): d0cb899

Update mcp/clinicaltrials.py

Browse files
Files changed (1) hide show
  1. mcp/clinicaltrials.py +82 -52
mcp/clinicaltrials.py CHANGED
@@ -1,56 +1,86 @@
1
- #!/usr/bin/env python3
2
- """MedGenesis – ClinicalTrials.gov **v2** async wrapper.
3
-
4
- The legacy v1 JSON endpoint often throttles (HTTPΒ 403/503). This helper
5
- switches to the production **v2** REST API, adds retry/back‑off, and
6
- mirrors the interface expected by existing code (`search_trials`).
7
-
8
- Key points
9
- ~~~~~~~~~~
10
- * Endpoint: `https://clinicaltrials.gov/api/v2/studies` (public, no key).
11
- * Back‑off retry (2Γ—, 4Γ—) for 429/5xx.
12
- * Results cached 12Β h (512 queries).
13
- * Returns list of dicts with minimal fields: `nctId`, `briefTitle`,
14
- `phase`, `status`, `startDate`, `conditions`, `interventions`.
15
  """
16
- from __future__ import annotations
 
 
 
 
 
 
 
 
 
 
 
 
17
 
 
18
  import asyncio, httpx
19
  from functools import lru_cache
20
- from typing import List, Dict, Any
21
-
22
- _BASE = "https://clinicaltrials.gov/api/v2/studies"
23
- _TIMEOUT = 15
24
- _HEADERS = {"User-Agent": "MedGenesis/1.0 (https://huggingface.co/spaces)"}
25
- _MAX_PAGE = 50 # absolute hard cap
26
-
27
- # ---------------------------------------------------------------------
28
- # Internal fetch with retry
29
- # ---------------------------------------------------------------------
30
- async def _fetch(params: Dict[str, Any], *, retries: int = 3) -> List[Dict]:
31
- delay = 2
32
- for _ in range(retries):
33
- async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
34
- r = await cli.get(_BASE, params=params)
35
- if r.status_code == 200:
36
- return r.json().get("studies", [])
37
- if r.status_code in {429, 500, 503}:
38
- await asyncio.sleep(delay)
39
- delay *= 2
40
- continue
41
- r.raise_for_status()
42
- return [] # final fallback
43
-
44
- # ---------------------------------------------------------------------
45
- # Public helper – cached
46
- # ---------------------------------------------------------------------
47
- @lru_cache(maxsize=512)
48
- async def search_trials(term: str, *, max_studies: int = 10) -> List[Dict]:
49
- """Return up to *max_studies* clinical‑trial dicts for free‑text *term*."""
50
- max_studies = max(1, min(max_studies, _MAX_PAGE))
51
- params = {
52
- "query" : term,
53
- "pageSize": max_studies,
54
- "fields" : "nctId,briefTitle,phase,status,startDate,conditions,interventions",
55
- }
56
- return await _fetch(params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ clinicaltrials.py Β· Modernised helper (July-2025-ready)
3
+
4
+ Order of endpoints
5
+ ──────────────────
6
+ 1. beta-ut (modern ingest) https://beta-ut.clinicaltrials.gov/api/v2/studies
7
+ 2. prod v2 https://clinicaltrials.gov/api/v2/studies
8
+ 3. legacy v1 https://clinicaltrials.gov/api/query/study_fields
9
+ 4. WHO ICTRP mirror https://trialsearch.who.int/api/StudyFields (JSON)
10
+
11
+ All calls are GET, JSON; no API-key required.
12
+
13
+ Returns [] on any failure so orchestrator never raises.
14
+ """
15
 
16
+ from __future__ import annotations
17
  import asyncio, httpx
18
  from functools import lru_cache
19
+ from typing import List, Dict
20
+
21
+ _UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
22
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
23
+ "Chrome/126.0 Safari/537.36")
24
+
25
+ _HDR = {"User-Agent": _UA, "Accept": "application/json"}
26
+ _TIMEOUT = 12
27
+ _RETRY = 1
28
+ _BETA = "https://beta-ut.clinicaltrials.gov/api/v2/studies"
29
+ _V2 = "https://clinicaltrials.gov/api/v2/studies"
30
+ _V1 = "https://clinicaltrials.gov/api/query/study_fields"
31
+ _WHO = "https://trialsearch.who.int/api/StudyFields"
32
+
33
+ # ────────────────────────────────────────────────────────────────────
34
+ async def _get(url: str, params: Dict) -> Dict:
35
+ async with httpx.AsyncClient(timeout=_TIMEOUT,
36
+ headers=_HDR,
37
+ follow_redirects=True) as cli:
38
+ r = await cli.get(url, params=params)
39
+ r.raise_for_status()
40
+ return r.json()
41
+
42
+ # --- individual endpoint helpers -----------------------------------
43
+ async def _try_beta(term: str, n: int) -> List[Dict]:
44
+ p = {"query": term, "pageSize": n,
45
+ "fields": "nctId,briefTitle,phase,status,startDate,conditions,interventions"}
46
+ return (await _get(_BETA, p)).get("studies", [])
47
+
48
+ async def _try_v2(term: str, n: int) -> List[Dict]:
49
+ p = {"query": term, "pageSize": n,
50
+ "fields": "nctId,briefTitle,phase,status,startDate,conditions,interventions"}
51
+ return (await _get(_V2, p)).get("studies", [])
52
+
53
+ async def _try_v1(term: str, n: int) -> List[Dict]:
54
+ p = {"expr": term,
55
+ "fields": ("NCTId,BriefTitle,Phase,OverallStatus,StartDate,"
56
+ "Condition,InterventionName"),
57
+ "min_rnk": 1, "max_rnk": n, "fmt": "json"}
58
+ j = (await _get(_V1, p)).get("StudyFieldsResponse", {})
59
+ return j.get("StudyFields", [])
60
+
61
+ async def _try_who(term: str, n: int) -> List[Dict]:
62
+ p = {"expr": term, "fields": "URL,HealthCondition,PublicTitle",
63
+ "min_rnk": 1, "max_rnk": n, "fmt": "json"}
64
+ j = (await _get(_WHO, p))
65
+ return j.get("StudyFieldsResponse", {}).get("StudyFields", [])
66
+
67
+ # ────────────────────────────────────────────────────────────────────
68
+ @lru_cache(maxsize=256)
69
+ async def search_trials(term: str, *, max_studies: int = 20) -> List[Dict]:
70
+ """
71
+ Return ≀max_studies trial records using BETAβ†’V2β†’V1β†’WHO cascade.
72
+ Empty list on total failure.
73
+ """
74
+ limit = max(1, min(max_studies, 100))
75
+ for fn in (_try_beta, _try_v2, _try_v1, _try_who):
76
+ for attempt in range(_RETRY + 1):
77
+ try:
78
+ return await fn(term, limit)
79
+ except (httpx.HTTPStatusError, httpx.ReadTimeout):
80
+ if attempt < _RETRY:
81
+ await asyncio.sleep(0.8)
82
+ return []
83
+
84
+ # Back-compat alias (some older code calls search_trials_v2)
85
+ async def search_trials_v2(term: str, *, max_studies: int = 20):
86
+ return await search_trials(term, max_studies=max_studies)