mgbam commited on
Commit
0cd23e6
·
verified ·
1 Parent(s): 686ea1e

Update mcp/clinicaltrials.py

Browse files
Files changed (1) hide show
  1. mcp/clinicaltrials.py +25 -80
mcp/clinicaltrials.py CHANGED
@@ -1,86 +1,31 @@
1
  """
2
- clinicaltrials.py · Modernised helper (July-2025-ready)
3
-
4
- Order of endpoints
5
- ──────────────────
6
- 1. beta-ut (modern ingest) https://beta-ut.clinicaltrials.gov/api/v2/studies
7
- 2. prod v2 https://clinicaltrials.gov/api/v2/studies
8
- 3. legacy v1 https://clinicaltrials.gov/api/query/study_fields
9
- 4. WHO ICTRP mirror https://trialsearch.who.int/api/StudyFields (JSON)
10
-
11
- All calls are GET, JSON; no API-key required.
12
-
13
- Returns [] on any failure so orchestrator never raises.
14
  """
15
-
16
- from __future__ import annotations
17
- import asyncio, httpx
18
- from functools import lru_cache
19
  from typing import List, Dict
20
 
21
- _UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
22
- "AppleWebKit/537.36 (KHTML, like Gecko) "
23
- "Chrome/126.0 Safari/537.36")
24
-
25
- _HDR = {"User-Agent": _UA, "Accept": "application/json"}
26
- _TIMEOUT = 12
27
- _RETRY = 1
28
- _BETA = "https://beta-ut.clinicaltrials.gov/api/v2/studies"
29
- _V2 = "https://clinicaltrials.gov/api/v2/studies"
30
- _V1 = "https://clinicaltrials.gov/api/query/study_fields"
31
- _WHO = "https://trialsearch.who.int/api/StudyFields"
32
-
33
- # ────────────────────────────────────────────────────────────────────
34
- async def _get(url: str, params: Dict) -> Dict:
35
- async with httpx.AsyncClient(timeout=_TIMEOUT,
36
- headers=_HDR,
37
- follow_redirects=True) as cli:
38
- r = await cli.get(url, params=params)
39
- r.raise_for_status()
40
- return r.json()
41
 
42
- # --- individual endpoint helpers -----------------------------------
43
- async def _try_beta(term: str, n: int) -> List[Dict]:
44
- p = {"query": term, "pageSize": n,
45
- "fields": "nctId,briefTitle,phase,status,startDate,conditions,interventions"}
46
- return (await _get(_BETA, p)).get("studies", [])
47
-
48
- async def _try_v2(term: str, n: int) -> List[Dict]:
49
- p = {"query": term, "pageSize": n,
50
- "fields": "nctId,briefTitle,phase,status,startDate,conditions,interventions"}
51
- return (await _get(_V2, p)).get("studies", [])
52
-
53
- async def _try_v1(term: str, n: int) -> List[Dict]:
54
- p = {"expr": term,
55
- "fields": ("NCTId,BriefTitle,Phase,OverallStatus,StartDate,"
56
- "Condition,InterventionName"),
57
- "min_rnk": 1, "max_rnk": n, "fmt": "json"}
58
- j = (await _get(_V1, p)).get("StudyFieldsResponse", {})
59
- return j.get("StudyFields", [])
60
-
61
- async def _try_who(term: str, n: int) -> List[Dict]:
62
- p = {"expr": term, "fields": "URL,HealthCondition,PublicTitle",
63
- "min_rnk": 1, "max_rnk": n, "fmt": "json"}
64
- j = (await _get(_WHO, p))
65
- return j.get("StudyFieldsResponse", {}).get("StudyFields", [])
66
-
67
- # ────────────────────────────────────────────────────────────────────
68
- @lru_cache(maxsize=256)
69
  async def search_trials(term: str, *, max_studies: int = 20) -> List[Dict]:
70
- """
71
- Return ≤max_studies trial records using BETA→V2→V1→WHO cascade.
72
- Empty list on total failure.
73
- """
74
- limit = max(1, min(max_studies, 100))
75
- for fn in (_try_beta, _try_v2, _try_v1, _try_who):
76
- for attempt in range(_RETRY + 1):
77
- try:
78
- return await fn(term, limit)
79
- except (httpx.HTTPStatusError, httpx.ReadTimeout):
80
- if attempt < _RETRY:
81
- await asyncio.sleep(0.8)
82
- return []
83
-
84
- # Back-compat alias (some older code calls search_trials_v2)
85
- async def search_trials_v2(term: str, *, max_studies: int = 20):
86
- return await search_trials(term, max_studies=max_studies)
 
1
  """
2
+ clinicaltrials.py resilient mirror of ClinicalTrials.gov v2 API.
 
 
 
 
 
 
 
 
 
 
 
3
  """
4
+ import httpx, asyncio
 
 
 
5
  from typing import List, Dict
6
 
7
+ _BASE = "https://clinicaltrials.gov/api/v2/studies"
8
+ _UA = "Mozilla/5.0 (MedGenesis; +https://huggingface.co/spaces/mgbam/MCP_Res)"
9
+ _HDRS = {"User-Agent": _UA, "Accept": "application/json"}
10
+
11
+ async def _fetch(p: Dict, *, retries: int = 2) -> Dict:
12
+ async with httpx.AsyncClient(timeout=20, headers=_HDRS, follow_redirects=True) as c:
13
+ for _ in range(retries + 1):
14
+ r = await c.get(_BASE, params=p)
15
+ if r.status_code == 403:
16
+ await asyncio.sleep(1)
17
+ continue
18
+ if r.status_code >= 400:
19
+ return {}
20
+ return r.json()
21
+ return {}
 
 
 
 
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  async def search_trials(term: str, *, max_studies: int = 20) -> List[Dict]:
24
+ p = {
25
+ "query" : term,
26
+ "pageSize": max_studies,
27
+ "fields" : ",".join(["nctId","briefTitle","phase","status",
28
+ "startDate","conditions","interventions"]),
29
+ }
30
+ data = await _fetch(p)
31
+ return data.get("studies", [])