mgbam commited on
Commit
078f31a
Β·
verified Β·
1 Parent(s): 5647e44

Update mcp/ctgov.py

Browse files
Files changed (1) hide show
  1. mcp/ctgov.py +34 -19
mcp/ctgov.py CHANGED
@@ -1,19 +1,19 @@
1
  #!/usr/bin/env python3
2
  """
3
- ClinicalTrials.gov helper – v2 primary, v1 fallback.
4
  """
5
 
6
  from __future__ import annotations
7
- import asyncio, httpx, urllib.parse
8
  from functools import lru_cache
9
  from typing import List, Dict, Any
10
 
11
- # v2 REST
12
  _V2 = "https://clinicaltrials.gov/api/v2/studies"
13
- # legacy v1 (read-only but still works)
14
  _V1 = "https://clinicaltrials.gov/api/query/study_fields"
15
 
16
  _HEADERS = {
 
17
  "User-Agent": (
18
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
19
  "AppleWebKit/537.36 (KHTML, like Gecko) "
@@ -22,12 +22,14 @@ _HEADERS = {
22
  "Accept": "application/json",
23
  }
24
  _TIMEOUT = 15
25
- _MAX = 50 # hard cap
 
26
 
27
  async def _get(url: str, params: Dict[str, Any]) -> httpx.Response:
28
  async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
29
  return await cli.get(url, params=params)
30
 
 
31
  async def _try_v2(term: str, limit: int) -> List[Dict]:
32
  params = {
33
  "query" : term,
@@ -37,9 +39,9 @@ async def _try_v2(term: str, limit: int) -> List[Dict]:
37
  r = await _get(_V2, params)
38
  if r.status_code == 200:
39
  return r.json().get("studies", [])
40
- if r.status_code in {403, 429, 500, 503}:
41
- raise RuntimeError("v2 unavailable")
42
- r.raise_for_status() # other 4xx
43
 
44
  async def _try_v1(term: str, limit: int) -> List[Dict]:
45
  params = {
@@ -50,22 +52,35 @@ async def _try_v1(term: str, limit: int) -> List[Dict]:
50
  "fmt" : "json",
51
  }
52
  r = await _get(_V1, params)
53
- r.raise_for_status()
54
- return r.json().get("StudyFieldsResponse", {}).get("StudyFields", [])
 
 
55
 
56
  @lru_cache(maxsize=512)
57
  async def search_trials(term: str, *, max_studies: int = 10) -> List[Dict]:
58
- """
59
- Return up to *max_studies* trial dicts; v2 first, v1 fallback.
60
- """
61
  limit = max(1, min(max_studies, _MAX))
62
- # v2 with back-off: 2s β†’ 4s
63
- for delay in (0, 2, 4):
 
 
64
  try:
65
  if delay:
66
  await asyncio.sleep(delay)
67
  return await _try_v2(term, limit)
68
- except RuntimeError:
69
- continue
70
- # fallback to legacy endpoint
71
- return await _try_v1(term, limit)
 
 
 
 
 
 
 
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ ClinicalTrials.gov helper – v2 first, v1 fallback, 403-proof.
4
  """
5
 
6
  from __future__ import annotations
7
+ import asyncio, httpx
8
  from functools import lru_cache
9
  from typing import List, Dict, Any
10
 
11
+ # Endpoints
12
  _V2 = "https://clinicaltrials.gov/api/v2/studies"
 
13
  _V1 = "https://clinicaltrials.gov/api/query/study_fields"
14
 
15
  _HEADERS = {
16
+ # Chrome-ish UA + explicit JSON accept header ← avoids 403
17
  "User-Agent": (
18
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
19
  "AppleWebKit/537.36 (KHTML, like Gecko) "
 
22
  "Accept": "application/json",
23
  }
24
  _TIMEOUT = 15
25
+ _MAX = 50 # cap page size
26
+
27
 
28
  async def _get(url: str, params: Dict[str, Any]) -> httpx.Response:
29
  async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
30
  return await cli.get(url, params=params)
31
 
32
+
33
  async def _try_v2(term: str, limit: int) -> List[Dict]:
34
  params = {
35
  "query" : term,
 
39
  r = await _get(_V2, params)
40
  if r.status_code == 200:
41
  return r.json().get("studies", [])
42
+ # let caller decide to retry or fallback
43
+ raise httpx.HTTPStatusError("v2 failed", request=r.request, response=r)
44
+
45
 
46
  async def _try_v1(term: str, limit: int) -> List[Dict]:
47
  params = {
 
52
  "fmt" : "json",
53
  }
54
  r = await _get(_V1, params)
55
+ if r.status_code == 200:
56
+ return r.json().get("StudyFieldsResponse", {}).get("StudyFields", [])
57
+ raise httpx.HTTPStatusError("v1 failed", request=r.request, response=r)
58
+
59
 
60
  @lru_cache(maxsize=512)
61
  async def search_trials(term: str, *, max_studies: int = 10) -> List[Dict]:
62
+ """Return ≀ *max_studies* trials; silent `[]` if CT.gov is blocking."""
 
 
63
  limit = max(1, min(max_studies, _MAX))
64
+
65
+ # try v2 with back-off
66
+ delay = 0
67
+ for _ in range(3): # first + 2 retries
68
  try:
69
  if delay:
70
  await asyncio.sleep(delay)
71
  return await _try_v2(term, limit)
72
+ except httpx.HTTPStatusError as e:
73
+ if e.response.status_code not in {403, 429, 500, 503}:
74
+ break # non-retryable
75
+ delay = 2 if delay == 0 else delay * 2
76
+
77
+ # fallback to v1 (once)
78
+ try:
79
+ return await _try_v1(term, limit)
80
+ except httpx.HTTPStatusError:
81
+ return [] # final graceful fallback
82
+
83
+
84
+ # back-compat alias
85
+ async def search_trials_v2(term: str, *, max_studies: int = 10):
86
+ return await search_trials(term, max_studies=max_studies)