Update mcp/clinicaltrials.py
Browse files- mcp/clinicaltrials.py +82 -52
mcp/clinicaltrials.py
CHANGED
@@ -1,56 +1,86 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""MedGenesis β ClinicalTrials.gov **v2** async wrapper.
|
3 |
-
|
4 |
-
The legacy v1 JSON endpoint often throttles (HTTPΒ 403/503). This helper
|
5 |
-
switches to the production **v2** REST API, adds retry/backβoff, and
|
6 |
-
mirrors the interface expected by existing code (`search_trials`).
|
7 |
-
|
8 |
-
Key points
|
9 |
-
~~~~~~~~~~
|
10 |
-
* Endpoint: `https://clinicaltrials.gov/api/v2/studies` (public, no key).
|
11 |
-
* Backβoff retry (2Γ, 4Γ) for 429/5xx.
|
12 |
-
* Results cached 12Β h (512 queries).
|
13 |
-
* Returns list of dicts with minimal fields: `nctId`, `briefTitle`,
|
14 |
-
`phase`, `status`, `startDate`, `conditions`, `interventions`.
|
15 |
"""
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
|
|
18 |
import asyncio, httpx
|
19 |
from functools import lru_cache
|
20 |
-
from typing import List, Dict
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""
|
2 |
+
clinicaltrials.py Β· Modernised helper (July-2025-ready)
|
3 |
+
|
4 |
+
Order of endpoints
|
5 |
+
ββββββββββββββββββ
|
6 |
+
1. beta-ut (modern ingest) https://beta-ut.clinicaltrials.gov/api/v2/studies
|
7 |
+
2. prod v2 https://clinicaltrials.gov/api/v2/studies
|
8 |
+
3. legacy v1 https://clinicaltrials.gov/api/query/study_fields
|
9 |
+
4. WHO ICTRP mirror https://trialsearch.who.int/api/StudyFields (JSON)
|
10 |
+
|
11 |
+
All calls are GET, JSON; no API-key required.
|
12 |
+
|
13 |
+
Returns [] on any failure so orchestrator never raises.
|
14 |
+
"""
|
15 |
|
16 |
+
from __future__ import annotations
|
17 |
import asyncio, httpx
|
18 |
from functools import lru_cache
|
19 |
+
from typing import List, Dict
|
20 |
+
|
21 |
+
_UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
22 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
23 |
+
"Chrome/126.0 Safari/537.36")
|
24 |
+
|
25 |
+
_HDR = {"User-Agent": _UA, "Accept": "application/json"}
|
26 |
+
_TIMEOUT = 12
|
27 |
+
_RETRY = 1
|
28 |
+
_BETA = "https://beta-ut.clinicaltrials.gov/api/v2/studies"
|
29 |
+
_V2 = "https://clinicaltrials.gov/api/v2/studies"
|
30 |
+
_V1 = "https://clinicaltrials.gov/api/query/study_fields"
|
31 |
+
_WHO = "https://trialsearch.who.int/api/StudyFields"
|
32 |
+
|
33 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
34 |
+
async def _get(url: str, params: Dict) -> Dict:
|
35 |
+
async with httpx.AsyncClient(timeout=_TIMEOUT,
|
36 |
+
headers=_HDR,
|
37 |
+
follow_redirects=True) as cli:
|
38 |
+
r = await cli.get(url, params=params)
|
39 |
+
r.raise_for_status()
|
40 |
+
return r.json()
|
41 |
+
|
42 |
+
# --- individual endpoint helpers -----------------------------------
|
43 |
+
async def _try_beta(term: str, n: int) -> List[Dict]:
|
44 |
+
p = {"query": term, "pageSize": n,
|
45 |
+
"fields": "nctId,briefTitle,phase,status,startDate,conditions,interventions"}
|
46 |
+
return (await _get(_BETA, p)).get("studies", [])
|
47 |
+
|
48 |
+
async def _try_v2(term: str, n: int) -> List[Dict]:
|
49 |
+
p = {"query": term, "pageSize": n,
|
50 |
+
"fields": "nctId,briefTitle,phase,status,startDate,conditions,interventions"}
|
51 |
+
return (await _get(_V2, p)).get("studies", [])
|
52 |
+
|
53 |
+
async def _try_v1(term: str, n: int) -> List[Dict]:
|
54 |
+
p = {"expr": term,
|
55 |
+
"fields": ("NCTId,BriefTitle,Phase,OverallStatus,StartDate,"
|
56 |
+
"Condition,InterventionName"),
|
57 |
+
"min_rnk": 1, "max_rnk": n, "fmt": "json"}
|
58 |
+
j = (await _get(_V1, p)).get("StudyFieldsResponse", {})
|
59 |
+
return j.get("StudyFields", [])
|
60 |
+
|
61 |
+
async def _try_who(term: str, n: int) -> List[Dict]:
|
62 |
+
p = {"expr": term, "fields": "URL,HealthCondition,PublicTitle",
|
63 |
+
"min_rnk": 1, "max_rnk": n, "fmt": "json"}
|
64 |
+
j = (await _get(_WHO, p))
|
65 |
+
return j.get("StudyFieldsResponse", {}).get("StudyFields", [])
|
66 |
+
|
67 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
68 |
+
@lru_cache(maxsize=256)
|
69 |
+
async def search_trials(term: str, *, max_studies: int = 20) -> List[Dict]:
|
70 |
+
"""
|
71 |
+
Return β€max_studies trial records using BETAβV2βV1βWHO cascade.
|
72 |
+
Empty list on total failure.
|
73 |
+
"""
|
74 |
+
limit = max(1, min(max_studies, 100))
|
75 |
+
for fn in (_try_beta, _try_v2, _try_v1, _try_who):
|
76 |
+
for attempt in range(_RETRY + 1):
|
77 |
+
try:
|
78 |
+
return await fn(term, limit)
|
79 |
+
except (httpx.HTTPStatusError, httpx.ReadTimeout):
|
80 |
+
if attempt < _RETRY:
|
81 |
+
await asyncio.sleep(0.8)
|
82 |
+
return []
|
83 |
+
|
84 |
+
# Back-compat alias (some older code calls search_trials_v2)
|
85 |
+
async def search_trials_v2(term: str, *, max_studies: int = 20):
|
86 |
+
return await search_trials(term, max_studies=max_studies)
|