File size: 3,779 Bytes
98f589d 55cf8ec 98f589d 55cf8ec 98f589d 55cf8ec 98f589d 55cf8ec 98f589d 55cf8ec 98f589d 55cf8ec 98f589d 55cf8ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# mcp/umls.py
#!/usr/bin/env python3
"""MedGenesis – lightweight async client for **UMLS REST services**
Capabilities
~~~~~~~~~~~~
* Securely retrieves a Ticket‑Granting Ticket (TGT) with the API‑key
stored in the environment variable `UMLS_KEY` (Hugging Face secret).
* Uses the TGT to mint a short‑lived *Service Ticket* (ST) for each
search call – as required by the UMLS CAS workflow.
* `lookup_umls(term)` returns a dict with `{cui, name, rootSource}` for
the best match (pageSize = 1). Falls back gracefully if nothing found.
* Responses are cached for 4 h via `functools.lru_cache` to reduce quota
usage (default: 1000 requests/day).
Reference docs:
• Authentication – https://documentation.uts.nlm.nih.gov/rest/authentication.html
• Search endpoint – https://documentation.uts.nlm.nih.gov/rest/search.html
"""
from __future__ import annotations
import os, httpx, asyncio, time
from functools import lru_cache
from typing import Dict, Optional
# ---------------------------------------------------------------------
# Constants & env
# ---------------------------------------------------------------------
_UMLS_API_KEY = os.getenv("UMLS_KEY")
if not _UMLS_API_KEY:
raise RuntimeError("Environment variable UMLS_KEY not set – cannot authenticate to UMLS API")
_AUTH_URL = "https://utslogin.nlm.nih.gov/cas/v1/api-key"
_SERVICE = "http://umlsks.nlm.nih.gov" # per UMLS docs
_SEARCH_URL = "https://uts-ws.nlm.nih.gov/rest/search/current"
_SESSION_TIMEOUT = 15 # seconds
# ---------------------------------------------------------------------
# Ticket helpers
# ---------------------------------------------------------------------
@lru_cache(maxsize=1)
async def _get_tgt() -> str:
"""Get a Ticket‑Granting Ticket (TGT). Cached for its lifetime (~8 h)."""
async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli:
resp = await cli.post(_AUTH_URL, data={"apikey": _UMLS_API_KEY})
if resp.status_code != 201:
raise RuntimeError(f"UMLS auth failed: {resp.text[:200]}")
tgt_url = resp.text.split('action="')[1].split('"')[0]
return tgt_url # looks like: https://utslogin.nlm.nih.gov/cas/v1/tickets/TGT-…
async def _get_service_ticket() -> str:
tgt = await _get_tgt()
async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli:
resp = await cli.post(tgt, data={"service": _SERVICE})
resp.raise_for_status()
return resp.text # single‑use ST
# ---------------------------------------------------------------------
# Public search helper
# ---------------------------------------------------------------------
@lru_cache(maxsize=512)
async def lookup_umls(term: str) -> Dict[str, Optional[str]]:
"""Return best‑match UMLS concept for *term* (or empty placeholders)."""
st = await _get_service_ticket()
params = {
"string" : term,
"ticket" : st,
"pageSize" : 1,
}
async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli:
resp = await cli.get(_SEARCH_URL, params=params)
resp.raise_for_status()
results = resp.json().get("result", {}).get("results", [])
if results:
hit = results[0]
return {
"term" : term,
"cui" : hit.get("ui"),
"name" : hit.get("name"),
"rootSource": hit.get("rootSource"),
}
return {"term": term, "cui": None, "name": None, "rootSource": None}
# ---------------------------------------------------------------------
# CLI demo
# ---------------------------------------------------------------------
if __name__ == "__main__":
async def _demo():
print(await lookup_umls("glioblastoma"))
asyncio.run(_demo())
|