File size: 4,437 Bytes
5035006 e816b33 5035006 e816b33 5035006 e816b33 078f31a 5035006 c9d29cb e816b33 c09fa6f 5035006 e816b33 5035006 e816b33 078f31a e816b33 c09fa6f e816b33 5035006 e816b33 078f31a 5035006 c9d29cb e816b33 5035006 e816b33 c9d29cb 5035006 078f31a 5035006 e816b33 5035006 078f31a e816b33 078f31a e816b33 5035006 e816b33 5035006 078f31a e816b33 078f31a e816b33 078f31a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
#!/usr/bin/env python3
"""mcp/ctgov.py β ClinicalTrials helper (Modernized Julyβ2025βready)
Strategy
========
1. **Primary** β Modernized OASΒ v2 endpoint (betaβut) announced by CT.gov for Julyβ―2025.
2. **Fallbackβ1** β Production v2 (`/api/v2/studies`).
3. **Fallbackβ2** β Legacy v1 (`/api/query/study_fields`).
4. If all failΒ β return empty list so UI never crashes.
Features
--------
* 12βsecond timeout, 3βstep backβoff (2Β βΒ 4Β βΒ 8β―s) on `403/429/5xx`.
* Explicit `Accept: application/json` header (passes WAF).
* Realistic ChromeΒ UA.
* LRUβcached for 24Β h.
* Exports `search_trials` **and** `search_trials_v2` for backβcompat.
"""
from __future__ import annotations
import asyncio, httpx
from functools import lru_cache
from typing import List, Dict, Any
# ββ endpoints βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
_BETA = "https://beta-ut.clinicaltrials.gov/api/v2/studies" # modernized ingest
_V2 = "https://clinicaltrials.gov/api/v2/studies" # prod v2
_V1 = "https://clinicaltrials.gov/api/query/study_fields" # legacy JSON
_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/126.0 Safari/537.36"
),
"Accept": "application/json",
}
_TIMEOUT = 12
# hard cap to protect quotas
_MAX = 100
# ββ helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
async def _get(url: str, params: Dict[str, Any]) -> httpx.Response:
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
return await cli.get(url, params=params)
async def _try_beta(term: str, limit: int) -> List[Dict]:
params = {
"query": term,
"pageSize": limit,
"fields": (
"nctId,briefTitle,phase,status,startDate,conditions,interventions"
),
}
r = await _get(_BETA, params)
if r.status_code == 200:
return r.json().get("studies", [])
raise httpx.HTTPStatusError("beta failed", request=r.request, response=r)
async def _try_v2(term: str, limit: int) -> List[Dict]:
params = {
"query": term,
"pageSize": limit,
"fields": (
"nctId,briefTitle,phase,status,startDate,conditions,interventions"
),
}
r = await _get(_V2, params)
if r.status_code == 200:
return r.json().get("studies", [])
raise httpx.HTTPStatusError("v2 failed", request=r.request, response=r)
async def _try_v1(term: str, limit: int) -> List[Dict]:
params = {
"expr": term,
"fields": (
"NCTId,BriefTitle,Phase,OverallStatus,StartDate,Condition,InterventionName"
),
"max_rnk": limit,
"min_rnk": 1,
"fmt": "json",
}
r = await _get(_V1, params)
if r.status_code == 200:
return (
r.json()
.get("StudyFieldsResponse", {})
.get("StudyFields", [])
)
raise httpx.HTTPStatusError("v1 failed", request=r.request, response=r)
# ββ public --------------------------------------------------------------------
@lru_cache(maxsize=512)
async def search_trials(term: str, *, max_studies: int = 20) -> List[Dict]:
"""Return β€ *max_studies* trials using BETAβV2βV1 cascade (never raises)."""
limit = max(1, min(max_studies, _MAX))
# Order: beta, prod v2, legacy v1
for fetch in (_try_beta, _try_v2, _try_v1):
delay = 0
for attempt in range(3):
try:
if delay:
await asyncio.sleep(delay)
return await fetch(term, limit)
except httpx.HTTPStatusError as e:
# retry only on 403/429/5xx
if e.response.status_code not in {403, 429, 500, 502, 503, 504}:
break
delay = 2 if delay == 0 else delay * 2
except (httpx.ReadTimeout, httpx.ConnectTimeout):
delay = 2 if delay == 0 else delay * 2
return [] # graceful fallback
# backβcompat alias for old imports
async def search_trials_v2(term: str, *, max_studies: int = 20):
return await search_trials(term, max_studies=max_studies)
|