File size: 4,300 Bytes
d5e0cb0 0f5d296 d5e0cb0 0f5d296 d5e0cb0 0f5d296 d5e0cb0 0f5d296 d5e0cb0 0f5d296 d5e0cb0 0f5d296 d5e0cb0 eb1f007 633ba95 d5e0cb0 0f5d296 d5e0cb0 a392df0 633ba95 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# mcp/knowledge_graph.py
"""
Build agraph-compatible nodes + edges for the MedGenesis UI.
Robustness notes
----------------
* Accepts *any* iterable for ``papers``, ``umls``, ``drug_safety``.
* Silently skips items that are **not** dictionaries or have missing keys.
* Normalises drug-safety payloads that may arrive as dict **or** list.
* Always casts labels to string β avoids ``None.lower()`` errors.
"""
from __future__ import annotations
import re
from typing import List, Tuple
from streamlit_agraph import Node, Edge, Config
# ββ helpers -----------------------------------------------------------------
def _safe_str(x) -> str:
"""Return UTF-8 string or empty string."""
return str(x) if x is not None else ""
def _uniquify(nodes: List[Node]) -> List[Node]:
"""Remove duplicate node-ids (keep first)."""
seen, out = set(), []
for n in nodes:
if n.id not in seen:
out.append(n)
seen.add(n.id)
return out
# ββ public builder ----------------------------------------------------------
def build_agraph(
papers: list,
umls: list,
drug_safety: list,
) -> Tuple[List[Node], List[Edge], Config]:
"""
Parameters
----------
papers : List[dict]
Must contain keys ``title``, ``summary``.
umls : List[dict]
Dicts with at least ``name`` and ``cui``.
drug_safety : List[dict | list]
OpenFDA records β could be one dict or list of dicts.
Returns
-------
nodes, edges, cfg : tuple
Ready for ``streamlit_agraph.agraph``.
"""
nodes: List[Node] = []
edges: List[Edge] = []
# ββ UMLS concepts -------------------------------------------------------
for c in umls:
if not isinstance(c, dict):
continue
cui = _safe_str(c.get("cui")).strip()
name = _safe_str(c.get("name")).strip()
if not (cui and name):
continue
nodes.append(
Node(id=f"concept_{cui}", label=name, size=28, color="#00b894")
)
# ββ Drug safety --------------------------------------------------------
drug_nodes: List[Tuple[str, str]] = []
for idx, rec in enumerate(drug_safety):
if not rec:
continue
recs = rec if isinstance(rec, list) else [rec]
for j, r in enumerate(recs):
if not isinstance(r, dict):
continue
dn = (
r.get("drug_name")
or r.get("patient", {}).get("drug")
or r.get("medicinalproduct")
)
dn = _safe_str(dn).strip() or f"drug_{idx}_{j}"
did = f"drug_{idx}_{j}"
drug_nodes.append((did, dn))
nodes.append(Node(id=did, label=dn, size=25, color="#d35400"))
# ββ Papers & edges ------------------------------------------------------
for p_idx, p in enumerate(papers):
if not isinstance(p, dict):
continue
pid = f"paper_{p_idx}"
title = _safe_str(p.get("title"))
summary = _safe_str(p.get("summary"))
nodes.append(
Node(
id=pid,
label=f"P{p_idx + 1}",
tooltip=title,
size=16,
color="#0984e3",
)
)
text_blob = f"{title} {summary}".lower()
# β concept edges
for c in umls:
if not isinstance(c, dict):
continue
name = _safe_str(c.get("name")).lower()
cui = _safe_str(c.get("cui"))
if name and cui and name in text_blob:
edges.append(
Edge(source=pid, target=f"concept_{cui}", label="mentions")
)
# β drug edges
for did, dn in drug_nodes:
if dn.lower() in text_blob:
edges.append(Edge(source=pid, target=did, label="mentions"))
# ββ deduplicate & config ------------------------------------------------
nodes = _uniquify(nodes)
cfg = Config(
width="100%",
height="600px",
directed=False,
nodeHighlightBehavior=True,
highlightColor="#f1c40f",
collapsible=True,
node={"labelProperty": "label"},
)
return nodes, edges, cfg
|