|
|
|
|
|
|
|
"""MedGenesis – lightweight async client for **UMLS REST services** |
|
|
|
Capabilities |
|
~~~~~~~~~~~~ |
|
* Securely retrieves a Ticket‑Granting Ticket (TGT) with the API‑key |
|
stored in the environment variable `UMLS_KEY` (Hugging Face secret). |
|
* Uses the TGT to mint a short‑lived *Service Ticket* (ST) for each |
|
search call – as required by the UMLS CAS workflow. |
|
* `lookup_umls(term)` returns a dict with `{cui, name, rootSource}` for |
|
the best match (pageSize = 1). Falls back gracefully if nothing found. |
|
* Responses are cached for 4 h via `functools.lru_cache` to reduce quota |
|
usage (default: 1000 requests/day). |
|
|
|
Reference docs: |
|
• Authentication – https://documentation.uts.nlm.nih.gov/rest/authentication.html |
|
• Search endpoint – https://documentation.uts.nlm.nih.gov/rest/search.html |
|
""" |
|
from __future__ import annotations |
|
|
|
import os, httpx, asyncio, time |
|
from functools import lru_cache |
|
from typing import Dict, Optional |
|
|
|
|
|
|
|
|
|
_UMLS_API_KEY = os.getenv("UMLS_KEY") |
|
if not _UMLS_API_KEY: |
|
raise RuntimeError("Environment variable UMLS_KEY not set – cannot authenticate to UMLS API") |
|
|
|
_AUTH_URL = "https://utslogin.nlm.nih.gov/cas/v1/api-key" |
|
_SERVICE = "http://umlsks.nlm.nih.gov" |
|
_SEARCH_URL = "https://uts-ws.nlm.nih.gov/rest/search/current" |
|
|
|
_SESSION_TIMEOUT = 15 |
|
|
|
|
|
|
|
|
|
|
|
@lru_cache(maxsize=1) |
|
async def _get_tgt() -> str: |
|
"""Get a Ticket‑Granting Ticket (TGT). Cached for its lifetime (~8 h).""" |
|
async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli: |
|
resp = await cli.post(_AUTH_URL, data={"apikey": _UMLS_API_KEY}) |
|
if resp.status_code != 201: |
|
raise RuntimeError(f"UMLS auth failed: {resp.text[:200]}") |
|
tgt_url = resp.text.split('action="')[1].split('"')[0] |
|
return tgt_url |
|
|
|
|
|
async def _get_service_ticket() -> str: |
|
tgt = await _get_tgt() |
|
async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli: |
|
resp = await cli.post(tgt, data={"service": _SERVICE}) |
|
resp.raise_for_status() |
|
return resp.text |
|
|
|
|
|
|
|
|
|
|
|
@lru_cache(maxsize=512) |
|
async def lookup_umls(term: str) -> Dict[str, Optional[str]]: |
|
"""Return best‑match UMLS concept for *term* (or empty placeholders).""" |
|
st = await _get_service_ticket() |
|
params = { |
|
"string" : term, |
|
"ticket" : st, |
|
"pageSize" : 1, |
|
} |
|
async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli: |
|
resp = await cli.get(_SEARCH_URL, params=params) |
|
resp.raise_for_status() |
|
results = resp.json().get("result", {}).get("results", []) |
|
|
|
if results: |
|
hit = results[0] |
|
return { |
|
"term" : term, |
|
"cui" : hit.get("ui"), |
|
"name" : hit.get("name"), |
|
"rootSource": hit.get("rootSource"), |
|
} |
|
return {"term": term, "cui": None, "name": None, "rootSource": None} |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
async def _demo(): |
|
print(await lookup_umls("glioblastoma")) |
|
asyncio.run(_demo()) |
|
|
|
|