Spaces:

mgbam
/

MCP_Res

Runtime error

mgbam commited on 4 days ago

Commit

c3f5ed6

verified ·

1 Parent(s): c4bf66f

Update mcp/nlp.py

Files changed (1) hide show

mcp/nlp.py CHANGED Viewed

@@ -1,30 +1,34 @@
 # mcp/nlp.py
 import spacy
-from scispacy.linking import EntityLinker
-@spacy.util.cache_dir("~/.cache/scispacy")
-def load_model():
-    nlp = spacy.load("en_core_sci_scibert")
-    linker = EntityLinker(name="umls", resolve_abbreviations=True, threshold=0.75)
-    nlp.add_pipe(linker)
-    return nlp
-nlp = load_model()
-def extract_umls_concepts(text: str) -> list[dict]:
     """
-    Returns unique UMLS concepts with confidence scores and semantic types.
     """
     doc = nlp(text)
-    best = {}
-    for ent in doc.ents:
-        for cui, score in ent._.umls_ents:
-            meta = nlp.get_pipe("scispacy_linker").kb.cui_to_entity[cui]
-            if cui not in best or score > best[cui]["score"]:
-                best[cui] = {
-                    "cui": cui,
-                    "name": meta.canonical_name,
-                    "score": float(score),
-                    "types": meta.types
-                }
-    return list(best.values())

 # mcp/nlp.py
+import asyncio
 import spacy
+from typing import List, Dict
+from mcp.umls import lookup_umls
+# Load only the small English model
+try:
+    nlp = spacy.load("en_core_web_sm")
+except OSError:
+    # In case it wasn’t downloaded yet
+    from spacy.cli import download
+    download("en_core_web_sm")
+    nlp = spacy.load("en_core_web_sm")
+async def extract_umls_concepts(text: str) -> List[Dict]:
     """
+    1) Run spaCy NER on the text
+    2) For each unique entity, do an async UMLS lookup
+    3) Return the list of successful concept dicts
     """
     doc = nlp(text)
+    terms = {ent.text for ent in doc.ents if len(ent.text.strip()) > 2}
+    # Kick off all lookups in parallel
+    tasks = [lookup_umls(term) for term in terms]
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    # Filter out failures & concepts without CUI
+    concepts = []
+    for r in results:
+        if isinstance(r, dict) and r.get("cui"):
+            concepts.append(r)
+    return concepts