Update mcp/umls.py
Browse files- mcp/umls.py +90 -26
mcp/umls.py
CHANGED
@@ -1,34 +1,98 @@
|
|
1 |
# mcp/umls.py
|
2 |
|
3 |
-
|
4 |
-
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
if resp.status_code != 201:
|
14 |
-
raise
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
|
19 |
-
async def
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
params = {
|
22 |
-
"string": term,
|
23 |
-
"ticket":
|
24 |
-
"pageSize": 1,
|
25 |
}
|
26 |
-
async with httpx.AsyncClient() as
|
27 |
-
resp = await
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# mcp/umls.py
|
2 |
|
3 |
+
#!/usr/bin/env python3
|
4 |
+
"""MedGenesis – lightweight async client for **UMLS REST services**
|
5 |
|
6 |
+
Capabilities
|
7 |
+
~~~~~~~~~~~~
|
8 |
+
* Securely retrieves a Ticket‑Granting Ticket (TGT) with the API‑key
|
9 |
+
stored in the environment variable `UMLS_KEY` (Hugging Face secret).
|
10 |
+
* Uses the TGT to mint a short‑lived *Service Ticket* (ST) for each
|
11 |
+
search call – as required by the UMLS CAS workflow.
|
12 |
+
* `lookup_umls(term)` returns a dict with `{cui, name, rootSource}` for
|
13 |
+
the best match (pageSize = 1). Falls back gracefully if nothing found.
|
14 |
+
* Responses are cached for 4 h via `functools.lru_cache` to reduce quota
|
15 |
+
usage (default: 1000 requests/day).
|
16 |
|
17 |
+
Reference docs:
|
18 |
+
• Authentication – https://documentation.uts.nlm.nih.gov/rest/authentication.html
|
19 |
+
• Search endpoint – https://documentation.uts.nlm.nih.gov/rest/search.html
|
20 |
+
"""
|
21 |
+
from __future__ import annotations
|
22 |
+
|
23 |
+
import os, httpx, asyncio, time
|
24 |
+
from functools import lru_cache
|
25 |
+
from typing import Dict, Optional
|
26 |
+
|
27 |
+
# ---------------------------------------------------------------------
|
28 |
+
# Constants & env
|
29 |
+
# ---------------------------------------------------------------------
|
30 |
+
_UMLS_API_KEY = os.getenv("UMLS_KEY")
|
31 |
+
if not _UMLS_API_KEY:
|
32 |
+
raise RuntimeError("Environment variable UMLS_KEY not set – cannot authenticate to UMLS API")
|
33 |
+
|
34 |
+
_AUTH_URL = "https://utslogin.nlm.nih.gov/cas/v1/api-key"
|
35 |
+
_SERVICE = "http://umlsks.nlm.nih.gov" # per UMLS docs
|
36 |
+
_SEARCH_URL = "https://uts-ws.nlm.nih.gov/rest/search/current"
|
37 |
+
|
38 |
+
_SESSION_TIMEOUT = 15 # seconds
|
39 |
+
|
40 |
+
# ---------------------------------------------------------------------
|
41 |
+
# Ticket helpers
|
42 |
+
# ---------------------------------------------------------------------
|
43 |
+
|
44 |
+
@lru_cache(maxsize=1)
|
45 |
+
async def _get_tgt() -> str:
|
46 |
+
"""Get a Ticket‑Granting Ticket (TGT). Cached for its lifetime (~8 h)."""
|
47 |
+
async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli:
|
48 |
+
resp = await cli.post(_AUTH_URL, data={"apikey": _UMLS_API_KEY})
|
49 |
if resp.status_code != 201:
|
50 |
+
raise RuntimeError(f"UMLS auth failed: {resp.text[:200]}")
|
51 |
+
tgt_url = resp.text.split('action="')[1].split('"')[0]
|
52 |
+
return tgt_url # looks like: https://utslogin.nlm.nih.gov/cas/v1/tickets/TGT-…
|
53 |
+
|
54 |
|
55 |
+
async def _get_service_ticket() -> str:
|
56 |
+
tgt = await _get_tgt()
|
57 |
+
async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli:
|
58 |
+
resp = await cli.post(tgt, data={"service": _SERVICE})
|
59 |
+
resp.raise_for_status()
|
60 |
+
return resp.text # single‑use ST
|
61 |
+
|
62 |
+
# ---------------------------------------------------------------------
|
63 |
+
# Public search helper
|
64 |
+
# ---------------------------------------------------------------------
|
65 |
+
|
66 |
+
@lru_cache(maxsize=512)
|
67 |
+
async def lookup_umls(term: str) -> Dict[str, Optional[str]]:
|
68 |
+
"""Return best‑match UMLS concept for *term* (or empty placeholders)."""
|
69 |
+
st = await _get_service_ticket()
|
70 |
params = {
|
71 |
+
"string" : term,
|
72 |
+
"ticket" : st,
|
73 |
+
"pageSize" : 1,
|
74 |
}
|
75 |
+
async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli:
|
76 |
+
resp = await cli.get(_SEARCH_URL, params=params)
|
77 |
+
resp.raise_for_status()
|
78 |
+
results = resp.json().get("result", {}).get("results", [])
|
79 |
+
|
80 |
+
if results:
|
81 |
+
hit = results[0]
|
82 |
+
return {
|
83 |
+
"term" : term,
|
84 |
+
"cui" : hit.get("ui"),
|
85 |
+
"name" : hit.get("name"),
|
86 |
+
"rootSource": hit.get("rootSource"),
|
87 |
+
}
|
88 |
+
return {"term": term, "cui": None, "name": None, "rootSource": None}
|
89 |
+
|
90 |
+
|
91 |
+
# ---------------------------------------------------------------------
|
92 |
+
# CLI demo
|
93 |
+
# ---------------------------------------------------------------------
|
94 |
+
if __name__ == "__main__":
|
95 |
+
async def _demo():
|
96 |
+
print(await lookup_umls("glioblastoma"))
|
97 |
+
asyncio.run(_demo())
|
98 |
+
|