MCP_Res / mcp /ctgov.py
mgbam's picture
Update mcp/ctgov.py
078f31a verified
raw
history blame
2.78 kB
#!/usr/bin/env python3
"""
ClinicalTrials.gov helper – v2 first, v1 fallback, 403-proof.
"""
from __future__ import annotations
import asyncio, httpx
from functools import lru_cache
from typing import List, Dict, Any
# Endpoints
_V2 = "https://clinicaltrials.gov/api/v2/studies"
_V1 = "https://clinicaltrials.gov/api/query/study_fields"
_HEADERS = {
# Chrome-ish UA + explicit JSON accept header ← avoids 403
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/125.0 Safari/537.36"
),
"Accept": "application/json",
}
_TIMEOUT = 15
_MAX = 50 # cap page size
async def _get(url: str, params: Dict[str, Any]) -> httpx.Response:
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
return await cli.get(url, params=params)
async def _try_v2(term: str, limit: int) -> List[Dict]:
params = {
"query" : term,
"pageSize": limit,
"fields" : "nctId,briefTitle,phase,status,startDate,conditions,interventions",
}
r = await _get(_V2, params)
if r.status_code == 200:
return r.json().get("studies", [])
# let caller decide to retry or fallback
raise httpx.HTTPStatusError("v2 failed", request=r.request, response=r)
async def _try_v1(term: str, limit: int) -> List[Dict]:
params = {
"expr" : term,
"fields" : "NCTId,BriefTitle,Phase,OverallStatus,StartDate,Condition,InterventionName",
"max_rnk" : limit,
"min_rnk" : 1,
"fmt" : "json",
}
r = await _get(_V1, params)
if r.status_code == 200:
return r.json().get("StudyFieldsResponse", {}).get("StudyFields", [])
raise httpx.HTTPStatusError("v1 failed", request=r.request, response=r)
@lru_cache(maxsize=512)
async def search_trials(term: str, *, max_studies: int = 10) -> List[Dict]:
"""Return ≀ *max_studies* trials; silent `[]` if CT.gov is blocking."""
limit = max(1, min(max_studies, _MAX))
# try v2 with back-off
delay = 0
for _ in range(3): # first + 2 retries
try:
if delay:
await asyncio.sleep(delay)
return await _try_v2(term, limit)
except httpx.HTTPStatusError as e:
if e.response.status_code not in {403, 429, 500, 503}:
break # non-retryable
delay = 2 if delay == 0 else delay * 2
# fallback to v1 (once)
try:
return await _try_v1(term, limit)
except httpx.HTTPStatusError:
return [] # final graceful fallback
# back-compat alias
async def search_trials_v2(term: str, *, max_studies: int = 10):
return await search_trials(term, max_studies=max_studies)