Update mcp/clinicaltrials.py
Browse files- mcp/clinicaltrials.py +25 -80
mcp/clinicaltrials.py
CHANGED
@@ -1,86 +1,31 @@
|
|
1 |
"""
|
2 |
-
clinicaltrials.py
|
3 |
-
|
4 |
-
Order of endpoints
|
5 |
-
──────────────────
|
6 |
-
1. beta-ut (modern ingest) https://beta-ut.clinicaltrials.gov/api/v2/studies
|
7 |
-
2. prod v2 https://clinicaltrials.gov/api/v2/studies
|
8 |
-
3. legacy v1 https://clinicaltrials.gov/api/query/study_fields
|
9 |
-
4. WHO ICTRP mirror https://trialsearch.who.int/api/StudyFields (JSON)
|
10 |
-
|
11 |
-
All calls are GET, JSON; no API-key required.
|
12 |
-
|
13 |
-
Returns [] on any failure so orchestrator never raises.
|
14 |
"""
|
15 |
-
|
16 |
-
from __future__ import annotations
|
17 |
-
import asyncio, httpx
|
18 |
-
from functools import lru_cache
|
19 |
from typing import List, Dict
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
headers=_HDR,
|
37 |
-
follow_redirects=True) as cli:
|
38 |
-
r = await cli.get(url, params=params)
|
39 |
-
r.raise_for_status()
|
40 |
-
return r.json()
|
41 |
|
42 |
-
# --- individual endpoint helpers -----------------------------------
|
43 |
-
async def _try_beta(term: str, n: int) -> List[Dict]:
|
44 |
-
p = {"query": term, "pageSize": n,
|
45 |
-
"fields": "nctId,briefTitle,phase,status,startDate,conditions,interventions"}
|
46 |
-
return (await _get(_BETA, p)).get("studies", [])
|
47 |
-
|
48 |
-
async def _try_v2(term: str, n: int) -> List[Dict]:
|
49 |
-
p = {"query": term, "pageSize": n,
|
50 |
-
"fields": "nctId,briefTitle,phase,status,startDate,conditions,interventions"}
|
51 |
-
return (await _get(_V2, p)).get("studies", [])
|
52 |
-
|
53 |
-
async def _try_v1(term: str, n: int) -> List[Dict]:
|
54 |
-
p = {"expr": term,
|
55 |
-
"fields": ("NCTId,BriefTitle,Phase,OverallStatus,StartDate,"
|
56 |
-
"Condition,InterventionName"),
|
57 |
-
"min_rnk": 1, "max_rnk": n, "fmt": "json"}
|
58 |
-
j = (await _get(_V1, p)).get("StudyFieldsResponse", {})
|
59 |
-
return j.get("StudyFields", [])
|
60 |
-
|
61 |
-
async def _try_who(term: str, n: int) -> List[Dict]:
|
62 |
-
p = {"expr": term, "fields": "URL,HealthCondition,PublicTitle",
|
63 |
-
"min_rnk": 1, "max_rnk": n, "fmt": "json"}
|
64 |
-
j = (await _get(_WHO, p))
|
65 |
-
return j.get("StudyFieldsResponse", {}).get("StudyFields", [])
|
66 |
-
|
67 |
-
# ────────────────────────────────────────────────────────────────────
|
68 |
-
@lru_cache(maxsize=256)
|
69 |
async def search_trials(term: str, *, max_studies: int = 20) -> List[Dict]:
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
return await fn(term, limit)
|
79 |
-
except (httpx.HTTPStatusError, httpx.ReadTimeout):
|
80 |
-
if attempt < _RETRY:
|
81 |
-
await asyncio.sleep(0.8)
|
82 |
-
return []
|
83 |
-
|
84 |
-
# Back-compat alias (some older code calls search_trials_v2)
|
85 |
-
async def search_trials_v2(term: str, *, max_studies: int = 20):
|
86 |
-
return await search_trials(term, max_studies=max_studies)
|
|
|
1 |
"""
|
2 |
+
clinicaltrials.py – resilient mirror of ClinicalTrials.gov v2 API.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
"""
|
4 |
+
import httpx, asyncio
|
|
|
|
|
|
|
5 |
from typing import List, Dict
|
6 |
|
7 |
+
_BASE = "https://clinicaltrials.gov/api/v2/studies"
|
8 |
+
_UA = "Mozilla/5.0 (MedGenesis; +https://huggingface.co/spaces/mgbam/MCP_Res)"
|
9 |
+
_HDRS = {"User-Agent": _UA, "Accept": "application/json"}
|
10 |
+
|
11 |
+
async def _fetch(p: Dict, *, retries: int = 2) -> Dict:
|
12 |
+
async with httpx.AsyncClient(timeout=20, headers=_HDRS, follow_redirects=True) as c:
|
13 |
+
for _ in range(retries + 1):
|
14 |
+
r = await c.get(_BASE, params=p)
|
15 |
+
if r.status_code == 403:
|
16 |
+
await asyncio.sleep(1)
|
17 |
+
continue
|
18 |
+
if r.status_code >= 400:
|
19 |
+
return {}
|
20 |
+
return r.json()
|
21 |
+
return {}
|
|
|
|
|
|
|
|
|
|
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
async def search_trials(term: str, *, max_studies: int = 20) -> List[Dict]:
|
24 |
+
p = {
|
25 |
+
"query" : term,
|
26 |
+
"pageSize": max_studies,
|
27 |
+
"fields" : ",".join(["nctId","briefTitle","phase","status",
|
28 |
+
"startDate","conditions","interventions"]),
|
29 |
+
}
|
30 |
+
data = await _fetch(p)
|
31 |
+
return data.get("studies", [])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|