|
|
|
""" |
|
Build agraph-compatible nodes + edges for the MedGenesis UI. |
|
|
|
Robustness notes |
|
---------------- |
|
* Accepts *any* iterable for ``papers``, ``umls``, ``drug_safety``. |
|
* Silently skips items that are **not** dictionaries or have missing keys. |
|
* Normalises drug-safety payloads that may arrive as dict **or** list. |
|
* Always casts labels to string β avoids ``None.lower()`` errors. |
|
""" |
|
|
|
from __future__ import annotations |
|
|
|
import re |
|
from typing import List, Tuple |
|
|
|
from streamlit_agraph import Node, Edge, Config |
|
|
|
|
|
|
|
def _safe_str(x) -> str: |
|
"""Return UTF-8 string or empty string.""" |
|
return str(x) if x is not None else "" |
|
|
|
|
|
def _uniquify(nodes: List[Node]) -> List[Node]: |
|
"""Remove duplicate node-ids (keep first).""" |
|
seen, out = set(), [] |
|
for n in nodes: |
|
if n.id not in seen: |
|
out.append(n) |
|
seen.add(n.id) |
|
return out |
|
|
|
|
|
|
|
def build_agraph( |
|
papers: list, |
|
umls: list, |
|
drug_safety: list, |
|
) -> Tuple[List[Node], List[Edge], Config]: |
|
""" |
|
Parameters |
|
---------- |
|
papers : List[dict] |
|
Must contain keys ``title``, ``summary``. |
|
umls : List[dict] |
|
Dicts with at least ``name`` and ``cui``. |
|
drug_safety : List[dict | list] |
|
OpenFDA records β could be one dict or list of dicts. |
|
|
|
Returns |
|
------- |
|
nodes, edges, cfg : tuple |
|
Ready for ``streamlit_agraph.agraph``. |
|
""" |
|
|
|
nodes: List[Node] = [] |
|
edges: List[Edge] = [] |
|
|
|
|
|
for c in umls: |
|
if not isinstance(c, dict): |
|
continue |
|
cui = _safe_str(c.get("cui")).strip() |
|
name = _safe_str(c.get("name")).strip() |
|
if not (cui and name): |
|
continue |
|
nodes.append( |
|
Node(id=f"concept_{cui}", label=name, size=28, color="#00b894") |
|
) |
|
|
|
|
|
drug_nodes: List[Tuple[str, str]] = [] |
|
for idx, rec in enumerate(drug_safety): |
|
if not rec: |
|
continue |
|
recs = rec if isinstance(rec, list) else [rec] |
|
for j, r in enumerate(recs): |
|
if not isinstance(r, dict): |
|
continue |
|
dn = ( |
|
r.get("drug_name") |
|
or r.get("patient", {}).get("drug") |
|
or r.get("medicinalproduct") |
|
) |
|
dn = _safe_str(dn).strip() or f"drug_{idx}_{j}" |
|
did = f"drug_{idx}_{j}" |
|
drug_nodes.append((did, dn)) |
|
nodes.append(Node(id=did, label=dn, size=25, color="#d35400")) |
|
|
|
|
|
for p_idx, p in enumerate(papers): |
|
if not isinstance(p, dict): |
|
continue |
|
pid = f"paper_{p_idx}" |
|
title = _safe_str(p.get("title")) |
|
summary = _safe_str(p.get("summary")) |
|
nodes.append( |
|
Node( |
|
id=pid, |
|
label=f"P{p_idx + 1}", |
|
tooltip=title, |
|
size=16, |
|
color="#0984e3", |
|
) |
|
) |
|
|
|
text_blob = f"{title} {summary}".lower() |
|
|
|
|
|
for c in umls: |
|
if not isinstance(c, dict): |
|
continue |
|
name = _safe_str(c.get("name")).lower() |
|
cui = _safe_str(c.get("cui")) |
|
if name and cui and name in text_blob: |
|
edges.append( |
|
Edge(source=pid, target=f"concept_{cui}", label="mentions") |
|
) |
|
|
|
|
|
for did, dn in drug_nodes: |
|
if dn.lower() in text_blob: |
|
edges.append(Edge(source=pid, target=did, label="mentions")) |
|
|
|
|
|
nodes = _uniquify(nodes) |
|
|
|
cfg = Config( |
|
width="100%", |
|
height="600px", |
|
directed=False, |
|
nodeHighlightBehavior=True, |
|
highlightColor="#f1c40f", |
|
collapsible=True, |
|
node={"labelProperty": "label"}, |
|
) |
|
return nodes, edges, cfg |
|
|