mgbam commited on
Commit
80779c4
·
verified ·
1 Parent(s): 8d292e0

Update mcp/clinicaltrials.py

Browse files
Files changed (1) hide show
  1. mcp/clinicaltrials.py +49 -27
mcp/clinicaltrials.py CHANGED
@@ -1,31 +1,53 @@
1
- """
2
- clinicaltrials.py – resilient mirror of ClinicalTrials.gov v2 API.
3
- """
4
- import httpx, asyncio
5
- from typing import List, Dict
6
 
7
- _BASE = "https://clinicaltrials.gov/api/v2/studies"
8
- _UA = "Mozilla/5.0 (MedGenesis; +https://huggingface.co/spaces/mgbam/MCP_Res)"
9
- _HDRS = {"User-Agent": _UA, "Accept": "application/json"}
 
 
 
 
 
 
 
10
 
11
- async def _fetch(p: Dict, *, retries: int = 2) -> Dict:
12
- async with httpx.AsyncClient(timeout=20, headers=_HDRS, follow_redirects=True) as c:
13
- for _ in range(retries + 1):
14
- r = await c.get(_BASE, params=p)
15
- if r.status_code == 403:
16
- await asyncio.sleep(1)
17
- continue
18
- if r.status_code >= 400:
19
- return {}
20
- return r.json()
21
- return {}
22
-
23
- async def search_trials(term: str, *, max_studies: int = 20) -> List[Dict]:
24
  p = {
25
- "query" : term,
26
- "pageSize": max_studies,
27
- "fields" : ",".join(["nctId","briefTitle","phase","status",
28
- "startDate","conditions","interventions"]),
 
 
29
  }
30
- data = await _fetch(p)
31
- return data.get("studies", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import httpx, random
 
 
 
 
2
 
3
+ _BASE_V2 = "https://clinicaltrials.gov/api/v2/studies"
4
+ _BASE_V1 = "https://clinicaltrials.gov/api/query/study_fields"
5
+ _HEADERS = {
6
+ # 3 random desktop UAs – simple rotation avoids naïve geo blocks
7
+ 0: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
8
+ "(KHTML, like Gecko) Chrome/125 Safari/537.36",
9
+ 1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_6) AppleWebKit/605.1.15 "
10
+ "(KHTML, like Gecko) Version/16 Safari/605.1.15",
11
+ 2: "Mozilla/5.0 (X11; Linux x86_64) Gecko/20100101 Firefox/126.0",
12
+ }
13
 
14
+ async def _try_v2(term: str, n: int):
 
 
 
 
 
 
 
 
 
 
 
 
15
  p = {
16
+ "query": term,
17
+ "pageSize": n,
18
+ "fields": ",".join([
19
+ "nctId", "briefTitle", "phase", "status",
20
+ "startDate", "conditions", "interventions",
21
+ ]),
22
  }
23
+ async with httpx.AsyncClient(
24
+ headers={"User-Agent": _HEADERS[random.randint(0,2)]}, timeout=12
25
+ ) as c:
26
+ r = await c.get(_BASE_V2, params=p)
27
+ if r.status_code == 403:
28
+ raise RuntimeError("v2 blocked")
29
+ r.raise_for_status()
30
+ return r.json().get("studies", [])
31
+
32
+ async def _try_v1(term: str, n: int):
33
+ p = dict(
34
+ expr=term,
35
+ fields="NCTId,BriefTitle,Phase,OverallStatus,StartDate,Condition,InterventionName",
36
+ max_rnk=n, min_rnk=1, fmt="json",
37
+ )
38
+ async with httpx.AsyncClient(
39
+ headers={"User-Agent": _HEADERS[random.randint(0,2)]}, timeout=12
40
+ ) as c:
41
+ r = await c.get(_BASE_V1, params=p)
42
+ r.raise_for_status()
43
+ return r.json()["StudyFieldsResponse"]["StudyFields"]
44
+
45
+ # public
46
+ async def search_trials(term: str, max_studies: int = 20):
47
+ try:
48
+ return await _try_v2(term, max_studies)
49
+ except Exception:
50
+ try:
51
+ return await _try_v1(term, max_studies)
52
+ except Exception:
53
+ return [] # always return list