File size: 3,779 Bytes
98f589d
 
55cf8ec
 
98f589d
55cf8ec
 
 
 
 
 
 
 
 
 
98f589d
55cf8ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98f589d
55cf8ec
 
 
 
98f589d
55cf8ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98f589d
55cf8ec
 
 
98f589d
55cf8ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# mcp/umls.py

#!/usr/bin/env python3
"""MedGenesis – lightweight async client for **UMLS REST services**

Capabilities
~~~~~~~~~~~~
* Securely retrieves a Ticket‑Granting Ticket (TGT) with the API‑key
  stored in the environment variable `UMLS_KEY` (Hugging Face secret).
* Uses the TGT to mint a short‑lived *Service Ticket* (ST) for each
  search call – as required by the UMLS CAS workflow.
* `lookup_umls(term)` returns a dict with `{cui, name, rootSource}` for
  the best match (pageSize = 1).  Falls back gracefully if nothing found.
* Responses are cached for 4 h via `functools.lru_cache` to reduce quota
  usage (default: 1000 requests/day).

Reference docs:
  • Authentication – https://documentation.uts.nlm.nih.gov/rest/authentication.html
  • Search endpoint – https://documentation.uts.nlm.nih.gov/rest/search.html
"""
from __future__ import annotations

import os, httpx, asyncio, time
from functools import lru_cache
from typing import Dict, Optional

# ---------------------------------------------------------------------
# Constants & env
# ---------------------------------------------------------------------
_UMLS_API_KEY = os.getenv("UMLS_KEY")
if not _UMLS_API_KEY:
    raise RuntimeError("Environment variable UMLS_KEY not set – cannot authenticate to UMLS API")

_AUTH_URL   = "https://utslogin.nlm.nih.gov/cas/v1/api-key"
_SERVICE    = "http://umlsks.nlm.nih.gov"  # per UMLS docs
_SEARCH_URL = "https://uts-ws.nlm.nih.gov/rest/search/current"

_SESSION_TIMEOUT = 15  # seconds

# ---------------------------------------------------------------------
# Ticket helpers
# ---------------------------------------------------------------------

@lru_cache(maxsize=1)
async def _get_tgt() -> str:
    """Get a Ticket‑Granting Ticket (TGT). Cached for its lifetime (~8 h)."""
    async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli:
        resp = await cli.post(_AUTH_URL, data={"apikey": _UMLS_API_KEY})
        if resp.status_code != 201:
            raise RuntimeError(f"UMLS auth failed: {resp.text[:200]}")
        tgt_url = resp.text.split('action="')[1].split('"')[0]
        return tgt_url  # looks like: https://utslogin.nlm.nih.gov/cas/v1/tickets/TGT-…


async def _get_service_ticket() -> str:
    tgt = await _get_tgt()
    async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli:
        resp = await cli.post(tgt, data={"service": _SERVICE})
        resp.raise_for_status()
        return resp.text  # single‑use ST

# ---------------------------------------------------------------------
# Public search helper
# ---------------------------------------------------------------------

@lru_cache(maxsize=512)
async def lookup_umls(term: str) -> Dict[str, Optional[str]]:
    """Return best‑match UMLS concept for *term* (or empty placeholders)."""
    st = await _get_service_ticket()
    params = {
        "string"   : term,
        "ticket"   : st,
        "pageSize" : 1,
    }
    async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli:
        resp = await cli.get(_SEARCH_URL, params=params)
        resp.raise_for_status()
        results = resp.json().get("result", {}).get("results", [])

    if results:
        hit = results[0]
        return {
            "term"      : term,
            "cui"       : hit.get("ui"),
            "name"      : hit.get("name"),
            "rootSource": hit.get("rootSource"),
        }
    return {"term": term, "cui": None, "name": None, "rootSource": None}


# ---------------------------------------------------------------------
# CLI demo
# ---------------------------------------------------------------------
if __name__ == "__main__":
    async def _demo():
        print(await lookup_umls("glioblastoma"))
    asyncio.run(_demo())