File size: 2,777 Bytes
5035006
c9d29cb
078f31a
5035006
 
 
078f31a
5035006
 
c9d29cb
078f31a
5035006
 
c09fa6f
5035006
078f31a
5035006
 
 
 
 
 
 
 
078f31a
 
c09fa6f
5035006
 
 
 
078f31a
5035006
c9d29cb
5035006
 
 
c9d29cb
5035006
 
 
078f31a
 
 
5035006
 
 
 
 
 
 
 
 
 
078f31a
 
 
 
5035006
 
 
078f31a
5035006
078f31a
 
 
 
5035006
 
 
 
078f31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python3
"""
ClinicalTrials.gov helper – v2 first, v1 fallback, 403-proof.
"""

from __future__ import annotations
import asyncio, httpx
from functools import lru_cache
from typing import List, Dict, Any

# Endpoints
_V2 = "https://clinicaltrials.gov/api/v2/studies"
_V1 = "https://clinicaltrials.gov/api/query/study_fields"

_HEADERS = {
    # Chrome-ish UA + explicit JSON accept header ← avoids 403
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/125.0 Safari/537.36"
    ),
    "Accept": "application/json",
}
_TIMEOUT = 15
_MAX     = 50          # cap page size


async def _get(url: str, params: Dict[str, Any]) -> httpx.Response:
    async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
        return await cli.get(url, params=params)


async def _try_v2(term: str, limit: int) -> List[Dict]:
    params = {
        "query"   : term,
        "pageSize": limit,
        "fields"  : "nctId,briefTitle,phase,status,startDate,conditions,interventions",
    }
    r = await _get(_V2, params)
    if r.status_code == 200:
        return r.json().get("studies", [])
    # let caller decide to retry or fallback
    raise httpx.HTTPStatusError("v2 failed", request=r.request, response=r)


async def _try_v1(term: str, limit: int) -> List[Dict]:
    params = {
        "expr"    : term,
        "fields"  : "NCTId,BriefTitle,Phase,OverallStatus,StartDate,Condition,InterventionName",
        "max_rnk" : limit,
        "min_rnk" : 1,
        "fmt"     : "json",
    }
    r = await _get(_V1, params)
    if r.status_code == 200:
        return r.json().get("StudyFieldsResponse", {}).get("StudyFields", [])
    raise httpx.HTTPStatusError("v1 failed", request=r.request, response=r)


@lru_cache(maxsize=512)
async def search_trials(term: str, *, max_studies: int = 10) -> List[Dict]:
    """Return ≀ *max_studies* trials; silent `[]` if CT.gov is blocking."""
    limit = max(1, min(max_studies, _MAX))

    # try v2 with back-off
    delay = 0
    for _ in range(3):        # first + 2 retries
        try:
            if delay:
                await asyncio.sleep(delay)
            return await _try_v2(term, limit)
        except httpx.HTTPStatusError as e:
            if e.response.status_code not in {403, 429, 500, 503}:
                break         # non-retryable
            delay = 2 if delay == 0 else delay * 2

    # fallback to v1 (once)
    try:
        return await _try_v1(term, limit)
    except httpx.HTTPStatusError:
        return []             # final graceful fallback


# back-compat alias
async def search_trials_v2(term: str, *, max_studies: int = 10):
    return await search_trials(term, max_studies=max_studies)