Update mcp/clinicaltrials.py
Browse files- mcp/clinicaltrials.py +49 -27
mcp/clinicaltrials.py
CHANGED
@@ -1,31 +1,53 @@
|
|
1 |
-
|
2 |
-
clinicaltrials.py – resilient mirror of ClinicalTrials.gov v2 API.
|
3 |
-
"""
|
4 |
-
import httpx, asyncio
|
5 |
-
from typing import List, Dict
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
async def
|
12 |
-
async with httpx.AsyncClient(timeout=20, headers=_HDRS, follow_redirects=True) as c:
|
13 |
-
for _ in range(retries + 1):
|
14 |
-
r = await c.get(_BASE, params=p)
|
15 |
-
if r.status_code == 403:
|
16 |
-
await asyncio.sleep(1)
|
17 |
-
continue
|
18 |
-
if r.status_code >= 400:
|
19 |
-
return {}
|
20 |
-
return r.json()
|
21 |
-
return {}
|
22 |
-
|
23 |
-
async def search_trials(term: str, *, max_studies: int = 20) -> List[Dict]:
|
24 |
p = {
|
25 |
-
"query"
|
26 |
-
"pageSize":
|
27 |
-
"fields"
|
28 |
-
|
|
|
|
|
29 |
}
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import httpx, random
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
_BASE_V2 = "https://clinicaltrials.gov/api/v2/studies"
|
4 |
+
_BASE_V1 = "https://clinicaltrials.gov/api/query/study_fields"
|
5 |
+
_HEADERS = {
|
6 |
+
# 3 random desktop UAs – simple rotation avoids naïve geo blocks
|
7 |
+
0: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
8 |
+
"(KHTML, like Gecko) Chrome/125 Safari/537.36",
|
9 |
+
1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_6) AppleWebKit/605.1.15 "
|
10 |
+
"(KHTML, like Gecko) Version/16 Safari/605.1.15",
|
11 |
+
2: "Mozilla/5.0 (X11; Linux x86_64) Gecko/20100101 Firefox/126.0",
|
12 |
+
}
|
13 |
|
14 |
+
async def _try_v2(term: str, n: int):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
p = {
|
16 |
+
"query": term,
|
17 |
+
"pageSize": n,
|
18 |
+
"fields": ",".join([
|
19 |
+
"nctId", "briefTitle", "phase", "status",
|
20 |
+
"startDate", "conditions", "interventions",
|
21 |
+
]),
|
22 |
}
|
23 |
+
async with httpx.AsyncClient(
|
24 |
+
headers={"User-Agent": _HEADERS[random.randint(0,2)]}, timeout=12
|
25 |
+
) as c:
|
26 |
+
r = await c.get(_BASE_V2, params=p)
|
27 |
+
if r.status_code == 403:
|
28 |
+
raise RuntimeError("v2 blocked")
|
29 |
+
r.raise_for_status()
|
30 |
+
return r.json().get("studies", [])
|
31 |
+
|
32 |
+
async def _try_v1(term: str, n: int):
|
33 |
+
p = dict(
|
34 |
+
expr=term,
|
35 |
+
fields="NCTId,BriefTitle,Phase,OverallStatus,StartDate,Condition,InterventionName",
|
36 |
+
max_rnk=n, min_rnk=1, fmt="json",
|
37 |
+
)
|
38 |
+
async with httpx.AsyncClient(
|
39 |
+
headers={"User-Agent": _HEADERS[random.randint(0,2)]}, timeout=12
|
40 |
+
) as c:
|
41 |
+
r = await c.get(_BASE_V1, params=p)
|
42 |
+
r.raise_for_status()
|
43 |
+
return r.json()["StudyFieldsResponse"]["StudyFields"]
|
44 |
+
|
45 |
+
# public
|
46 |
+
async def search_trials(term: str, max_studies: int = 20):
|
47 |
+
try:
|
48 |
+
return await _try_v2(term, max_studies)
|
49 |
+
except Exception:
|
50 |
+
try:
|
51 |
+
return await _try_v1(term, max_studies)
|
52 |
+
except Exception:
|
53 |
+
return [] # always return list
|