|
|
|
""" |
|
ClinicalTrials.gov helper β v2 first, v1 fallback, 403-proof. |
|
""" |
|
|
|
from __future__ import annotations |
|
import asyncio, httpx |
|
from functools import lru_cache |
|
from typing import List, Dict, Any |
|
|
|
|
|
_V2 = "https://clinicaltrials.gov/api/v2/studies" |
|
_V1 = "https://clinicaltrials.gov/api/query/study_fields" |
|
|
|
_HEADERS = { |
|
|
|
"User-Agent": ( |
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) " |
|
"AppleWebKit/537.36 (KHTML, like Gecko) " |
|
"Chrome/125.0 Safari/537.36" |
|
), |
|
"Accept": "application/json", |
|
} |
|
_TIMEOUT = 15 |
|
_MAX = 50 |
|
|
|
|
|
async def _get(url: str, params: Dict[str, Any]) -> httpx.Response: |
|
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli: |
|
return await cli.get(url, params=params) |
|
|
|
|
|
async def _try_v2(term: str, limit: int) -> List[Dict]: |
|
params = { |
|
"query" : term, |
|
"pageSize": limit, |
|
"fields" : "nctId,briefTitle,phase,status,startDate,conditions,interventions", |
|
} |
|
r = await _get(_V2, params) |
|
if r.status_code == 200: |
|
return r.json().get("studies", []) |
|
|
|
raise httpx.HTTPStatusError("v2 failed", request=r.request, response=r) |
|
|
|
|
|
async def _try_v1(term: str, limit: int) -> List[Dict]: |
|
params = { |
|
"expr" : term, |
|
"fields" : "NCTId,BriefTitle,Phase,OverallStatus,StartDate,Condition,InterventionName", |
|
"max_rnk" : limit, |
|
"min_rnk" : 1, |
|
"fmt" : "json", |
|
} |
|
r = await _get(_V1, params) |
|
if r.status_code == 200: |
|
return r.json().get("StudyFieldsResponse", {}).get("StudyFields", []) |
|
raise httpx.HTTPStatusError("v1 failed", request=r.request, response=r) |
|
|
|
|
|
@lru_cache(maxsize=512) |
|
async def search_trials(term: str, *, max_studies: int = 10) -> List[Dict]: |
|
"""Return β€ *max_studies* trials; silent `[]` if CT.gov is blocking.""" |
|
limit = max(1, min(max_studies, _MAX)) |
|
|
|
|
|
delay = 0 |
|
for _ in range(3): |
|
try: |
|
if delay: |
|
await asyncio.sleep(delay) |
|
return await _try_v2(term, limit) |
|
except httpx.HTTPStatusError as e: |
|
if e.response.status_code not in {403, 429, 500, 503}: |
|
break |
|
delay = 2 if delay == 0 else delay * 2 |
|
|
|
|
|
try: |
|
return await _try_v1(term, limit) |
|
except httpx.HTTPStatusError: |
|
return [] |
|
|
|
|
|
|
|
async def search_trials_v2(term: str, *, max_studies: int = 10): |
|
return await search_trials(term, max_studies=max_studies) |
|
|