MCP_Res / mcp /knowledge_graph.py
mgbam's picture
Update mcp/knowledge_graph.py
d5e0cb0 verified
raw
history blame
4.3 kB
# mcp/knowledge_graph.py
"""
Build agraph-compatible nodes + edges for the MedGenesis UI.
Robustness notes
----------------
* Accepts *any* iterable for ``papers``, ``umls``, ``drug_safety``.
* Silently skips items that are **not** dictionaries or have missing keys.
* Normalises drug-safety payloads that may arrive as dict **or** list.
* Always casts labels to string – avoids ``None.lower()`` errors.
"""
from __future__ import annotations
import re
from typing import List, Tuple
from streamlit_agraph import Node, Edge, Config
# ── helpers -----------------------------------------------------------------
def _safe_str(x) -> str:
"""Return UTF-8 string or empty string."""
return str(x) if x is not None else ""
def _uniquify(nodes: List[Node]) -> List[Node]:
"""Remove duplicate node-ids (keep first)."""
seen, out = set(), []
for n in nodes:
if n.id not in seen:
out.append(n)
seen.add(n.id)
return out
# ── public builder ----------------------------------------------------------
def build_agraph(
papers: list,
umls: list,
drug_safety: list,
) -> Tuple[List[Node], List[Edge], Config]:
"""
Parameters
----------
papers : List[dict]
Must contain keys ``title``, ``summary``.
umls : List[dict]
Dicts with at least ``name`` and ``cui``.
drug_safety : List[dict | list]
OpenFDA records – could be one dict or list of dicts.
Returns
-------
nodes, edges, cfg : tuple
Ready for ``streamlit_agraph.agraph``.
"""
nodes: List[Node] = []
edges: List[Edge] = []
# ── UMLS concepts -------------------------------------------------------
for c in umls:
if not isinstance(c, dict):
continue
cui = _safe_str(c.get("cui")).strip()
name = _safe_str(c.get("name")).strip()
if not (cui and name):
continue
nodes.append(
Node(id=f"concept_{cui}", label=name, size=28, color="#00b894")
)
# ── Drug safety --------------------------------------------------------
drug_nodes: List[Tuple[str, str]] = []
for idx, rec in enumerate(drug_safety):
if not rec:
continue
recs = rec if isinstance(rec, list) else [rec]
for j, r in enumerate(recs):
if not isinstance(r, dict):
continue
dn = (
r.get("drug_name")
or r.get("patient", {}).get("drug")
or r.get("medicinalproduct")
)
dn = _safe_str(dn).strip() or f"drug_{idx}_{j}"
did = f"drug_{idx}_{j}"
drug_nodes.append((did, dn))
nodes.append(Node(id=did, label=dn, size=25, color="#d35400"))
# ── Papers & edges ------------------------------------------------------
for p_idx, p in enumerate(papers):
if not isinstance(p, dict):
continue
pid = f"paper_{p_idx}"
title = _safe_str(p.get("title"))
summary = _safe_str(p.get("summary"))
nodes.append(
Node(
id=pid,
label=f"P{p_idx + 1}",
tooltip=title,
size=16,
color="#0984e3",
)
)
text_blob = f"{title} {summary}".lower()
# β†’ concept edges
for c in umls:
if not isinstance(c, dict):
continue
name = _safe_str(c.get("name")).lower()
cui = _safe_str(c.get("cui"))
if name and cui and name in text_blob:
edges.append(
Edge(source=pid, target=f"concept_{cui}", label="mentions")
)
# β†’ drug edges
for did, dn in drug_nodes:
if dn.lower() in text_blob:
edges.append(Edge(source=pid, target=did, label="mentions"))
# ── deduplicate & config ------------------------------------------------
nodes = _uniquify(nodes)
cfg = Config(
width="100%",
height="600px",
directed=False,
nodeHighlightBehavior=True,
highlightColor="#f1c40f",
collapsible=True,
node={"labelProperty": "label"},
)
return nodes, edges, cfg