Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

mgbam commited on 9 days ago

Commit

5951d5e

verified ·

1 Parent(s): 3f9a9ea

Update mcp/umls.py

Browse files

Files changed (1) hide show

mcp/umls.py +36 -146

mcp/umls.py CHANGED Viewed

@@ -1,152 +1,42 @@
-import os
-import re
-import httpx
-import asyncio
 from functools import lru_cache
-from pathlib import Path
-from typing import List, Optional, Dict, Any
-# ---------------------------------------------------------------------------
-# Configuration
-# ---------------------------------------------------------------------------
 UMLS_API_KEY = os.getenv("UMLS_KEY")
-UMLS_AUTH_URL = "https://utslogin.nlm.nih.gov/cas/v1/api-key"
-UMLS_SEARCH_URL = "https://uts-ws.nlm.nih.gov/rest/search/current"
-# ---------------------------------------------------------------------------
-# Named types
-# ---------------------------------------------------------------------------
-class UMLSResult(Dict[str, Optional[str]]):
-    """
-    Represents a single UMLS lookup result.
-    Keys: term, cui, name, definition
-    """
-    pass
-# ---------------------------------------------------------------------------
-# NLP model loading with caching
-# ---------------------------------------------------------------------------
-@lru_cache(maxsize=None)
-def _load_spacy_model(model_name: str):
-    import spacy
-    return spacy.load(model_name)
-@lru_cache(maxsize=None)
-def _load_scispacy_model():
-    # Prefer the BioNLP model; fall back to the smaller sci model
-    try:
-        return _load_spacy_model("en_ner_bionlp13cg_md")
-    except Exception:
-        return _load_spacy_model("en_core_sci_sm")
-@lru_cache(maxsize=None)
-def _load_general_spacy():
-    return _load_spacy_model("en_core_web_sm")
-# ---------------------------------------------------------------------------
-# Concept extraction utilities
-# ---------------------------------------------------------------------------
-def _extract_entities(nlp, text: str, min_length: int) -> List[str]:
-    """
-    Run a spaCy nlp pipeline over text and return unique entity texts
-    of at least min_length.
-    """
-    doc = nlp(text)
-    ents = {ent.text.strip() for ent in doc.ents if len(ent.text.strip()) >= min_length}
-    return list(ents)
-def _regex_fallback(text: str, min_length: int) -> List[str]:
-    """
-    Simple regex-based token extraction for fallback.
-    """
-    tokens = re.findall(r"\b[a-zA-Z0-9\-]+\b", text)
-    return list({t for t in tokens if len(t) >= min_length})
-def extract_umls_concepts(text: str, min_length: int = 3) -> List[str]:
-    """
-    Extract biomedical concepts from text in priority order:
-      1. SciSpaCy (en_ner_bionlp13cg_md or en_core_sci_sm)
-      2. spaCy general NER (en_core_web_sm)
-      3. Regex tokens
-    Guaranteed to return a list of unique strings.
-    """
-    # 1) SciSpaCy pipeline
-    try:
-        scispacy_nlp = _load_scispacy_model()
-        entities = _extract_entities(scispacy_nlp, text, min_length)
-        if entities:
-            return entities
-    except ImportError:
-        # SciSpaCy not installed
-        pass
-    except Exception:
-        # Unexpected failure in scispacy
-        pass
-    # 2) General spaCy pipeline
-    try:
-        general_nlp = _load_general_spacy()
-        entities = _extract_entities(general_nlp, text, min_length)
-        if entities:
-            return entities
-    except Exception:
-        pass
-    # 3) Regex fallback
-    return _regex_fallback(text, min_length)
-# ---------------------------------------------------------------------------
-# UMLS API integration
-# ---------------------------------------------------------------------------
-async def _get_umls_ticket() -> Optional[str]:
-    """
-    Obtain a UMLS service ticket for subsequent queries.
-    Returns None if API key is missing or authentication fails.
-    """
-    if not UMLS_API_KEY:
-        return None
-    try:
-        async with httpx.AsyncClient(timeout=10) as client:
-            response = await client.post(
-                UMLS_AUTH_URL, data={"apikey": UMLS_API_KEY}
-            )
-            response.raise_for_status()
-            tgt_url = response.text.split('action="')[1].split('"')[0]
-            service_resp = await client.post(
-                tgt_url, data={"service": "http://umlsks.nlm.nih.gov"}
-            )
-            return service_resp.text
-    except Exception:
-        return None
 @lru_cache(maxsize=512)
-async def lookup_umls(term: str) -> UMLSResult:
-    """
-    Look up a term in the UMLS API.
-    Returns a dict containing the original term, its CUI, preferred name, and definition.
-    On failure or quota issues, returns all values except 'term' as None.
-    """
-    ticket = await _get_umls_ticket()
-    if not ticket:
-        return {"term": term, "cui": None, "name": None, "definition": None}
     params = {"string": term, "ticket": ticket, "pageSize": 1}
-    try:
-        async with httpx.AsyncClient(timeout=8) as client:
-            resp = await client.get(UMLS_SEARCH_URL, params=params)
-            resp.raise_for_status()
-            results = resp.json().get("result", {}).get("results", [])
-            first = results[0] if results else {}
-            return {
-                "term": term,
-                "cui": first.get("ui"),
-                "name": first.get("name"),
-                "definition": first.get("definition") or first.get("rootSource"),
-            }
-    except Exception:
-        return {"term": term, "cui": None, "name": None, "definition": None}

+# mcp/umls.py
+import os, httpx
 from functools import lru_cache
 UMLS_API_KEY = os.getenv("UMLS_KEY")
+AUTH_URL     = "https://utslogin.nlm.nih.gov/cas/v1/api-key"
+SEARCH_URL   = "https://uts-ws.nlm.nih.gov/rest/search/current"
+CONTENT_URL  = "https://uts-ws.nlm.nih.gov/rest/content/current/CUI/{cui}"
+async def _get_ticket() -> str:
+    async with httpx.AsyncClient(timeout=10) as c:
+        r1 = await c.post(AUTH_URL, data={"apikey": UMLS_API_KEY})
+        r1.raise_for_status()
+        tgt = r1.text.split('action="')[1].split('"')[0]
+        r2 = await c.post(tgt, data={"service": "http://umlsks.nlm.nih.gov"})
+        r2.raise_for_status()
+        return r2.text
 @lru_cache(maxsize=512)
+async def lookup_umls(term: str) -> dict:
+    ticket = await _get_ticket()
     params = {"string": term, "ticket": ticket, "pageSize": 1}
+    async with httpx.AsyncClient(timeout=10) as c:
+        r = await c.get(SEARCH_URL, params=params)
+        r.raise_for_status()
+        items = r.json().get("result", {}).get("results", [])
+        if not items:
+            return {"term": term}
+        itm = items[0]
+        cui, name = itm.get("ui"), itm.get("name")
+        r2 = await c.get(CONTENT_URL.format(cui=cui), params={"ticket": ticket})
+        r2.raise_for_status()
+        entry = r2.json().get("result", {})
+        types = [t["name"] for t in entry.get("semanticTypes", [])]
+        definition = entry.get("definitions", [{}])[0].get("value", "")
+        return {
+            "term": term,
+            "cui": cui,
+            "name": name,
+            "definition": definition,
+            "types": types
+        }