mgbam commited on
Commit
e816b33
Β·
verified Β·
1 Parent(s): e421c45

Update mcp/ctgov.py

Browse files
Files changed (1) hide show
  1. mcp/ctgov.py +75 -43
mcp/ctgov.py CHANGED
@@ -1,86 +1,118 @@
1
  #!/usr/bin/env python3
2
- """
3
- ClinicalTrials.gov helper – v2 first, v1 fallback, 403-proof.
4
- """
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from __future__ import annotations
 
7
  import asyncio, httpx
8
  from functools import lru_cache
9
  from typing import List, Dict, Any
10
 
11
- # Endpoints
12
- _V2 = "https://clinicaltrials.gov/api/v2/studies"
13
- _V1 = "https://clinicaltrials.gov/api/query/study_fields"
 
14
 
15
  _HEADERS = {
16
- # Chrome-ish UA + explicit JSON accept header ← avoids 403
17
  "User-Agent": (
18
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
19
  "AppleWebKit/537.36 (KHTML, like Gecko) "
20
- "Chrome/125.0 Safari/537.36"
21
  ),
22
  "Accept": "application/json",
23
  }
24
- _TIMEOUT = 15
25
- _MAX = 50 # cap page size
26
 
 
 
27
 
 
28
  async def _get(url: str, params: Dict[str, Any]) -> httpx.Response:
29
  async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
30
  return await cli.get(url, params=params)
31
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  async def _try_v2(term: str, limit: int) -> List[Dict]:
34
  params = {
35
- "query" : term,
36
  "pageSize": limit,
37
- "fields" : "nctId,briefTitle,phase,status,startDate,conditions,interventions",
 
 
38
  }
39
  r = await _get(_V2, params)
40
  if r.status_code == 200:
41
  return r.json().get("studies", [])
42
- # let caller decide to retry or fallback
43
  raise httpx.HTTPStatusError("v2 failed", request=r.request, response=r)
44
 
45
-
46
  async def _try_v1(term: str, limit: int) -> List[Dict]:
47
  params = {
48
- "expr" : term,
49
- "fields" : "NCTId,BriefTitle,Phase,OverallStatus,StartDate,Condition,InterventionName",
50
- "max_rnk" : limit,
51
- "min_rnk" : 1,
52
- "fmt" : "json",
 
 
53
  }
54
  r = await _get(_V1, params)
55
  if r.status_code == 200:
56
- return r.json().get("StudyFieldsResponse", {}).get("StudyFields", [])
 
 
 
 
57
  raise httpx.HTTPStatusError("v1 failed", request=r.request, response=r)
58
 
59
-
60
  @lru_cache(maxsize=512)
61
- async def search_trials(term: str, *, max_studies: int = 10) -> List[Dict]:
62
- """Return ≀ *max_studies* trials; silent `[]` if CT.gov is blocking."""
63
  limit = max(1, min(max_studies, _MAX))
64
 
65
- # try v2 with back-off
66
- delay = 0
67
- for _ in range(3): # first + 2 retries
68
- try:
69
- if delay:
70
- await asyncio.sleep(delay)
71
- return await _try_v2(term, limit)
72
- except httpx.HTTPStatusError as e:
73
- if e.response.status_code not in {403, 429, 500, 503}:
74
- break # non-retryable
75
- delay = 2 if delay == 0 else delay * 2
76
-
77
- # fallback to v1 (once)
78
- try:
79
- return await _try_v1(term, limit)
80
- except httpx.HTTPStatusError:
81
- return [] # final graceful fallback
82
-
83
 
84
- # back-compat alias
85
- async def search_trials_v2(term: str, *, max_studies: int = 10):
86
  return await search_trials(term, max_studies=max_studies)
 
1
  #!/usr/bin/env python3
2
+ """mcp/ctgov.py – ClinicalTrials helper (Modernized July‑2025‑ready)
 
 
3
 
4
+ Strategy
5
+ ========
6
+ 1. **Primary** β†’ Modernized OASΒ v2 endpoint (beta‑ut) announced by CT.gov for Julyβ€―2025.
7
+ 2. **Fallback‑1** β†’ Production v2 (`/api/v2/studies`).
8
+ 3. **Fallback‑2** β†’ Legacy v1 (`/api/query/study_fields`).
9
+ 4. If all failΒ β†’ return empty list so UI never crashes.
10
+
11
+ Features
12
+ --------
13
+ * 12‑second timeout, 3‑step back‑off (2Β β†’Β 4Β β†’Β 8β€―s) on `403/429/5xx`.
14
+ * Explicit `Accept: application/json` header (passes WAF).
15
+ * Realistic ChromeΒ UA.
16
+ * LRU‑cached for 24Β h.
17
+ * Exports `search_trials` **and** `search_trials_v2` for back‑compat.
18
+ """
19
  from __future__ import annotations
20
+
21
  import asyncio, httpx
22
  from functools import lru_cache
23
  from typing import List, Dict, Any
24
 
25
+ # ── endpoints ───────────────────────────────────────────────────────
26
+ _BETA = "https://beta-ut.clinicaltrials.gov/api/v2/studies" # modernized ingest
27
+ _V2 = "https://clinicaltrials.gov/api/v2/studies" # prod v2
28
+ _V1 = "https://clinicaltrials.gov/api/query/study_fields" # legacy JSON
29
 
30
  _HEADERS = {
 
31
  "User-Agent": (
32
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
33
  "AppleWebKit/537.36 (KHTML, like Gecko) "
34
+ "Chrome/126.0 Safari/537.36"
35
  ),
36
  "Accept": "application/json",
37
  }
38
+ _TIMEOUT = 12
 
39
 
40
+ # hard cap to protect quotas
41
+ _MAX = 100
42
 
43
+ # ── helpers ─────────────────────────────────────────────────────────
44
  async def _get(url: str, params: Dict[str, Any]) -> httpx.Response:
45
  async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
46
  return await cli.get(url, params=params)
47
 
48
+ async def _try_beta(term: str, limit: int) -> List[Dict]:
49
+ params = {
50
+ "query": term,
51
+ "pageSize": limit,
52
+ "fields": (
53
+ "nctId,briefTitle,phase,status,startDate,conditions,interventions"
54
+ ),
55
+ }
56
+ r = await _get(_BETA, params)
57
+ if r.status_code == 200:
58
+ return r.json().get("studies", [])
59
+ raise httpx.HTTPStatusError("beta failed", request=r.request, response=r)
60
 
61
  async def _try_v2(term: str, limit: int) -> List[Dict]:
62
  params = {
63
+ "query": term,
64
  "pageSize": limit,
65
+ "fields": (
66
+ "nctId,briefTitle,phase,status,startDate,conditions,interventions"
67
+ ),
68
  }
69
  r = await _get(_V2, params)
70
  if r.status_code == 200:
71
  return r.json().get("studies", [])
 
72
  raise httpx.HTTPStatusError("v2 failed", request=r.request, response=r)
73
 
 
74
  async def _try_v1(term: str, limit: int) -> List[Dict]:
75
  params = {
76
+ "expr": term,
77
+ "fields": (
78
+ "NCTId,BriefTitle,Phase,OverallStatus,StartDate,Condition,InterventionName"
79
+ ),
80
+ "max_rnk": limit,
81
+ "min_rnk": 1,
82
+ "fmt": "json",
83
  }
84
  r = await _get(_V1, params)
85
  if r.status_code == 200:
86
+ return (
87
+ r.json()
88
+ .get("StudyFieldsResponse", {})
89
+ .get("StudyFields", [])
90
+ )
91
  raise httpx.HTTPStatusError("v1 failed", request=r.request, response=r)
92
 
93
+ # ── public --------------------------------------------------------------------
94
  @lru_cache(maxsize=512)
95
+ async def search_trials(term: str, *, max_studies: int = 20) -> List[Dict]:
96
+ """Return ≀ *max_studies* trials using BETAβ†’V2β†’V1 cascade (never raises)."""
97
  limit = max(1, min(max_studies, _MAX))
98
 
99
+ # Order: beta, prod v2, legacy v1
100
+ for fetch in (_try_beta, _try_v2, _try_v1):
101
+ delay = 0
102
+ for attempt in range(3):
103
+ try:
104
+ if delay:
105
+ await asyncio.sleep(delay)
106
+ return await fetch(term, limit)
107
+ except httpx.HTTPStatusError as e:
108
+ # retry only on 403/429/5xx
109
+ if e.response.status_code not in {403, 429, 500, 502, 503, 504}:
110
+ break
111
+ delay = 2 if delay == 0 else delay * 2
112
+ except (httpx.ReadTimeout, httpx.ConnectTimeout):
113
+ delay = 2 if delay == 0 else delay * 2
114
+ return [] # graceful fallback
 
 
115
 
116
+ # back‑compat alias for old imports
117
+ async def search_trials_v2(term: str, *, max_studies: int = 20):
118
  return await search_trials(term, max_studies=max_studies)