File size: 2,777 Bytes
5035006 c9d29cb 078f31a 5035006 078f31a 5035006 c9d29cb 078f31a 5035006 c09fa6f 5035006 078f31a 5035006 078f31a c09fa6f 5035006 078f31a 5035006 c9d29cb 5035006 c9d29cb 5035006 078f31a 5035006 078f31a 5035006 078f31a 5035006 078f31a 5035006 078f31a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
#!/usr/bin/env python3
"""
ClinicalTrials.gov helper β v2 first, v1 fallback, 403-proof.
"""
from __future__ import annotations
import asyncio, httpx
from functools import lru_cache
from typing import List, Dict, Any
# Endpoints
_V2 = "https://clinicaltrials.gov/api/v2/studies"
_V1 = "https://clinicaltrials.gov/api/query/study_fields"
_HEADERS = {
# Chrome-ish UA + explicit JSON accept header β avoids 403
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/125.0 Safari/537.36"
),
"Accept": "application/json",
}
_TIMEOUT = 15
_MAX = 50 # cap page size
async def _get(url: str, params: Dict[str, Any]) -> httpx.Response:
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
return await cli.get(url, params=params)
async def _try_v2(term: str, limit: int) -> List[Dict]:
params = {
"query" : term,
"pageSize": limit,
"fields" : "nctId,briefTitle,phase,status,startDate,conditions,interventions",
}
r = await _get(_V2, params)
if r.status_code == 200:
return r.json().get("studies", [])
# let caller decide to retry or fallback
raise httpx.HTTPStatusError("v2 failed", request=r.request, response=r)
async def _try_v1(term: str, limit: int) -> List[Dict]:
params = {
"expr" : term,
"fields" : "NCTId,BriefTitle,Phase,OverallStatus,StartDate,Condition,InterventionName",
"max_rnk" : limit,
"min_rnk" : 1,
"fmt" : "json",
}
r = await _get(_V1, params)
if r.status_code == 200:
return r.json().get("StudyFieldsResponse", {}).get("StudyFields", [])
raise httpx.HTTPStatusError("v1 failed", request=r.request, response=r)
@lru_cache(maxsize=512)
async def search_trials(term: str, *, max_studies: int = 10) -> List[Dict]:
"""Return β€ *max_studies* trials; silent `[]` if CT.gov is blocking."""
limit = max(1, min(max_studies, _MAX))
# try v2 with back-off
delay = 0
for _ in range(3): # first + 2 retries
try:
if delay:
await asyncio.sleep(delay)
return await _try_v2(term, limit)
except httpx.HTTPStatusError as e:
if e.response.status_code not in {403, 429, 500, 503}:
break # non-retryable
delay = 2 if delay == 0 else delay * 2
# fallback to v1 (once)
try:
return await _try_v1(term, limit)
except httpx.HTTPStatusError:
return [] # final graceful fallback
# back-compat alias
async def search_trials_v2(term: str, *, max_studies: int = 10):
return await search_trials(term, max_studies=max_studies)
|