mgbam commited on
Commit
b506ef3
·
verified ·
1 Parent(s): 872e232

Update mcp/clinicaltrials.py

Browse files
Files changed (1) hide show
  1. mcp/clinicaltrials.py +52 -25
mcp/clinicaltrials.py CHANGED
@@ -1,29 +1,56 @@
1
- # mcp/clinicaltrials.py – 403-proof CPU-only helper
2
- import httpx, asyncio, datetime
3
- from typing import List, Dict
4
 
5
- BASE = "https://clinicaltrials.gov/api/query/study_fields"
6
- UA = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
7
- "AppleWebKit/537.36 (KHTML, like Gecko) "
8
- "Chrome/124.0 Safari/537.36") # real browser UA
9
 
10
- _HEADERS = {"User-Agent": UA}
 
 
 
 
 
 
 
 
11
 
12
- async def _fetch(url: str, params: Dict) -> Dict:
13
- async with httpx.AsyncClient(timeout=20, headers=_HEADERS) as c:
14
- r = await c.get(url, params=params)
15
- # If still 4xx/5xx, return empty dict – keep app alive
16
- if r.status_code != 200:
17
- return {}
18
- return r.json()
19
 
20
- async def search_trials(term: str, max_studies: int = 10) -> List[Dict]:
21
- params = dict(
22
- expr=term,
23
- fields="NCTId,BriefTitle,Condition,InterventionName,Phase,OverallStatus,StartDate",
24
- max_rnk=max_studies,
25
- min_rnk=1,
26
- fmt="json",
27
- )
28
- data = await _fetch(BASE, params)
29
- return data.get("StudyFieldsResponse", {}).get("StudyFields", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """MedGenesis ClinicalTrials.gov **v2** async wrapper.
 
3
 
4
+ The legacy v1 JSON endpoint often throttles (HTTP 403/503). This helper
5
+ switches to the production **v2** REST API, adds retry/back‑off, and
6
+ mirrors the interface expected by existing code (`search_trials`).
 
7
 
8
+ Key points
9
+ ~~~~~~~~~~
10
+ * Endpoint: `https://clinicaltrials.gov/api/v2/studies` (public, no key).
11
+ * Back‑off retry (2×, 4×) for 429/5xx.
12
+ * Results cached 12 h (512 queries).
13
+ * Returns list of dicts with minimal fields: `nctId`, `briefTitle`,
14
+ `phase`, `status`, `startDate`, `conditions`, `interventions`.
15
+ """
16
+ from __future__ import annotations
17
 
18
+ import asyncio, httpx
19
+ from functools import lru_cache
20
+ from typing import List, Dict, Any
 
 
 
 
21
 
22
+ _BASE = "https://clinicaltrials.gov/api/v2/studies"
23
+ _TIMEOUT = 15
24
+ _HEADERS = {"User-Agent": "MedGenesis/1.0 (https://huggingface.co/spaces)"}
25
+ _MAX_PAGE = 50 # absolute hard cap
26
+
27
+ # ---------------------------------------------------------------------
28
+ # Internal fetch with retry
29
+ # ---------------------------------------------------------------------
30
+ async def _fetch(params: Dict[str, Any], *, retries: int = 3) -> List[Dict]:
31
+ delay = 2
32
+ for _ in range(retries):
33
+ async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
34
+ r = await cli.get(_BASE, params=params)
35
+ if r.status_code == 200:
36
+ return r.json().get("studies", [])
37
+ if r.status_code in {429, 500, 503}:
38
+ await asyncio.sleep(delay)
39
+ delay *= 2
40
+ continue
41
+ r.raise_for_status()
42
+ return [] # final fallback
43
+
44
+ # ---------------------------------------------------------------------
45
+ # Public helper – cached
46
+ # ---------------------------------------------------------------------
47
+ @lru_cache(maxsize=512)
48
+ async def search_trials(term: str, *, max_studies: int = 10) -> List[Dict]:
49
+ """Return up to *max_studies* clinical‑trial dicts for free‑text *term*."""
50
+ max_studies = max(1, min(max_studies, _MAX_PAGE))
51
+ params = {
52
+ "query" : term,
53
+ "pageSize": max_studies,
54
+ "fields" : "nctId,briefTitle,phase,status,startDate,conditions,interventions",
55
+ }
56
+ return await _fetch(params)