mgbam commited on
Commit
e202a39
Β·
verified Β·
1 Parent(s): 7808af5

Update mcp/knowledge_graph.py

Browse files
Files changed (1) hide show
  1. mcp/knowledge_graph.py +73 -121
mcp/knowledge_graph.py CHANGED
@@ -1,140 +1,92 @@
1
  # mcp/knowledge_graph.py
2
- """
3
- Build agraph-compatible nodes + edges for the MedGenesis UI.
4
-
5
- Robustness notes
6
- ----------------
7
- * Accepts *any* iterable for ``papers``, ``umls``, ``drug_safety``.
8
- * Silently skips items that are **not** dictionaries or have missing keys.
9
- * Normalises drug-safety payloads that may arrive as dict **or** list.
10
- * Always casts labels to string – avoids ``None.lower()`` errors.
11
- """
12
-
13
- from __future__ import annotations
14
-
15
- import re
16
- from typing import List, Tuple
17
 
18
  from streamlit_agraph import Node, Edge, Config
 
19
 
 
 
 
 
 
 
 
20
 
21
- # ── helpers -----------------------------------------------------------------
22
- def _safe_str(x) -> str:
23
- """Return UTF-8 string or empty string."""
24
- return str(x) if x is not None else ""
25
-
26
-
27
- def _uniquify(nodes: List[Node]) -> List[Node]:
28
- """Remove duplicate node-ids (keep first)."""
29
- seen, out = set(), []
30
- for n in nodes:
31
- if n.id not in seen:
32
- out.append(n)
33
- seen.add(n.id)
34
- return out
35
-
36
-
37
- # ── public builder ----------------------------------------------------------
38
- def build_agraph(
39
- papers: list,
40
- umls: list,
41
- drug_safety: list,
42
- ) -> Tuple[List[Node], List[Edge], Config]:
43
  """
44
- Parameters
45
- ----------
46
- papers : List[dict]
47
- Must contain keys ``title``, ``summary``.
48
- umls : List[dict]
49
- Dicts with at least ``name`` and ``cui``.
50
- drug_safety : List[dict | list]
51
- OpenFDA records – could be one dict or list of dicts.
52
-
53
- Returns
54
- -------
55
- nodes, edges, cfg : tuple
56
- Ready for ``streamlit_agraph.agraph``.
57
  """
 
58
 
59
- nodes: List[Node] = []
60
- edges: List[Edge] = []
61
-
62
- # ── UMLS concepts -------------------------------------------------------
63
- for c in umls:
64
- if not isinstance(c, dict):
65
  continue
66
- cui = _safe_str(c.get("cui")).strip()
67
- name = _safe_str(c.get("name")).strip()
68
- if not (cui and name):
 
 
 
 
 
69
  continue
70
- nodes.append(
71
- Node(id=f"concept_{cui}", label=name, size=28, color="#00b894")
72
- )
73
-
74
- # ── Drug safety --------------------------------------------------------
75
- drug_nodes: List[Tuple[str, str]] = []
76
- for idx, rec in enumerate(drug_safety):
77
- if not rec:
78
- continue
79
- recs = rec if isinstance(rec, list) else [rec]
80
- for j, r in enumerate(recs):
81
- if not isinstance(r, dict):
82
- continue
83
- dn = (
84
- r.get("drug_name")
85
- or r.get("patient", {}).get("drug")
86
- or r.get("medicinalproduct")
87
- )
88
- dn = _safe_str(dn).strip() or f"drug_{idx}_{j}"
89
- did = f"drug_{idx}_{j}"
90
- drug_nodes.append((did, dn))
91
- nodes.append(Node(id=did, label=dn, size=25, color="#d35400"))
92
-
93
- # ── Papers & edges ------------------------------------------------------
94
- for p_idx, p in enumerate(papers):
95
- if not isinstance(p, dict):
96
- continue
97
- pid = f"paper_{p_idx}"
98
- title = _safe_str(p.get("title"))
99
- summary = _safe_str(p.get("summary"))
100
- nodes.append(
101
- Node(
102
- id=pid,
103
- label=f"P{p_idx + 1}",
104
- tooltip=title,
105
- size=16,
106
- color="#0984e3",
107
  )
108
- )
109
-
110
- text_blob = f"{title} {summary}".lower()
111
-
112
- # β†’ concept edges
113
- for c in umls:
114
- if not isinstance(c, dict):
115
- continue
116
- name = _safe_str(c.get("name")).lower()
117
- cui = _safe_str(c.get("cui"))
118
- if name and cui and name in text_blob:
119
- edges.append(
120
- Edge(source=pid, target=f"concept_{cui}", label="mentions")
121
- )
122
-
123
- # β†’ drug edges
124
- for did, dn in drug_nodes:
125
- if dn.lower() in text_blob:
126
- edges.append(Edge(source=pid, target=did, label="mentions"))
127
-
128
- # ── deduplicate & config ------------------------------------------------
129
- nodes = _uniquify(nodes)
130
-
 
 
 
 
 
 
 
 
 
131
  cfg = Config(
132
  width="100%",
133
- height="600px",
134
  directed=False,
 
 
135
  nodeHighlightBehavior=True,
136
- highlightColor="#f1c40f",
137
  collapsible=True,
138
  node={"labelProperty": "label"},
 
139
  )
140
  return nodes, edges, cfg
 
 
1
  # mcp/knowledge_graph.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  from streamlit_agraph import Node, Edge, Config
4
+ import re
5
 
6
+ # Colors for graph nodes
7
+ EDGE_COLOR = "#888"
8
+ DRUG_COLOR = "#f39c12"
9
+ CONCEPT_COLOR = "#00b894"
10
+ PAPER_COLOR = "#3498db"
11
+ HL_COLOR = "#f1c40f"
12
+ DIM_COLOR = "#d3d3d3"
13
 
14
+ def build_agraph(papers, umls, drug_safety):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  """
16
+ Build a Streamlit-agraph network:
17
+ - papers: list of PubMed/arXiv dicts
18
+ - umls: list of UMLSConcept dicts (may have None values)
19
+ - drug_safety: list of OpenFDA/other dicts
20
+ Returns (nodes, edges, config)
 
 
 
 
 
 
 
 
21
  """
22
+ nodes, edges = [], []
23
 
24
+ # --- Add UMLS concept nodes ---
25
+ for c in (umls or []):
26
+ cui = c.get("cui") if c else None
27
+ name = c.get("name") if c else None
28
+ if not cui or not name:
 
29
  continue
30
+ node_id = f"concept_{cui}"
31
+ nodes.append(Node(id=node_id, label=name, size=22, color=CONCEPT_COLOR))
32
+
33
+ # --- Add Drug nodes ---
34
+ drug_ids = []
35
+ for i, drug_blob in enumerate(drug_safety or []):
36
+ # Support both list and dict style safety reports
37
+ if not drug_blob:
38
  continue
39
+ reports = drug_blob if isinstance(drug_blob, list) else [drug_blob]
40
+ for j, rec in enumerate(reports):
41
+ label = (
42
+ rec.get("drug_name")
43
+ or rec.get("patient", {}).get("drug")
44
+ or rec.get("medicinalproduct")
45
+ or f"drug_{i}_{j}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  )
47
+ drug_id = f"drug_{i}_{j}"
48
+ drug_ids.append((drug_id, label))
49
+ nodes.append(Node(id=drug_id, label=label, size=25, color=DRUG_COLOR))
50
+
51
+ # --- Add Paper nodes and connect to concepts/drugs ---
52
+ for k, p in enumerate(papers or []):
53
+ pid = f"paper_{k}"
54
+ title = p.get("title", f"Paper {k+1}")
55
+ summary = p.get("summary", "")
56
+ label = f"P{k+1}"
57
+ nodes.append(Node(
58
+ id=pid,
59
+ label=label,
60
+ tooltip=title,
61
+ size=14,
62
+ color=PAPER_COLOR,
63
+ ))
64
+ txt = (title + " " + summary).lower()
65
+
66
+ # Link to UMLS concepts if concept name in paper
67
+ for c in (umls or []):
68
+ cui = c.get("cui") if c else None
69
+ name = c.get("name") if c else None
70
+ if cui and name and isinstance(name, str) and name.lower() in txt:
71
+ edges.append(Edge(source=pid, target=f"concept_{cui}", color=EDGE_COLOR))
72
+
73
+ # Link to drug nodes if drug name appears in paper
74
+ for drug_id, drug_name in drug_ids:
75
+ if drug_name and isinstance(drug_name, str) and drug_name.lower() in txt:
76
+ edges.append(Edge(source=pid, target=drug_id, color=EDGE_COLOR))
77
+
78
+ # --- Graph config with physics enabled ---
79
  cfg = Config(
80
  width="100%",
81
+ height="520",
82
  directed=False,
83
+ physics=True,
84
+ repulsion=True,
85
  nodeHighlightBehavior=True,
86
+ highlightColor=HL_COLOR,
87
  collapsible=True,
88
  node={"labelProperty": "label"},
89
+ edge={"color": EDGE_COLOR, "width": 1},
90
  )
91
  return nodes, edges, cfg
92
+