mgbam commited on
Commit
633ba95
·
verified ·
1 Parent(s): 1bc973b

Update mcp/knowledge_graph.py

Browse files
Files changed (1) hide show
  1. mcp/knowledge_graph.py +141 -41
mcp/knowledge_graph.py CHANGED
@@ -1,61 +1,161 @@
1
- # mcp/knowledge_graph.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- from streamlit_agraph import Node, Edge, Config
4
  import re
 
5
 
6
- def build_agraph(papers, umls, drug_safety):
7
- """
8
- Build interactive agraph nodes and edges.
9
- Handles drug_safety entries that may be dict or list.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  """
11
 
12
- nodes, edges = [], []
 
13
 
14
- # Add UMLS concept nodes
15
  for c in umls:
16
- cui = c.get("cui")
17
- name = c.get("name", "")
18
  if cui and name:
19
- nid = f"concept_{cui}"
20
- nodes.append(Node(id=nid, label=name, size=25, color="#00b894"))
21
 
22
- # Add drug nodes, handling list or dict
23
- drug_names = []
24
- for i, dr in enumerate(drug_safety):
25
- if not dr:
26
- continue
 
 
27
 
28
- # Normalize to single dict
 
 
29
  recs = dr if isinstance(dr, list) else [dr]
30
  for j, rec in enumerate(recs):
31
- # Attempt to extract a drug name
32
- dn = rec.get("drug_name") or rec.get("patient", {}).get("drug", "") or rec.get("medicinalproduct", "")
33
- dn = dn or f"drug_{i}_{j}"
 
 
 
34
  did = f"drug_{i}_{j}"
35
- drug_names.append((did, dn))
36
- nodes.append(Node(id=did, label=dn, size=25, color="#d35400"))
37
 
38
- # Add paper nodes and link to concepts & drugs
39
- for pi, p in enumerate(papers):
40
- pid = f"paper_{pi}"
41
- nodes.append(Node(id=pid, label=f"P{pi+1}", tooltip=p["title"], size=15, color="#0984e3"))
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- text = f"{p.get('title','')} {p.get('summary','')}".lower()
44
- # Link to concepts
45
  for c in umls:
46
- cname = c.get("name", "")
47
- cui = c.get("cui")
48
- if cname and cui and cname.lower() in text:
49
- edges.append(Edge(source=pid, target=f"concept_{cui}", label="mentions"))
50
- # Link to drugs
51
- for did, dn in drug_names:
52
- if dn.lower() in text:
 
 
53
  edges.append(Edge(source=pid, target=did, label="mentions"))
54
 
55
- config = Config(
56
- width="100%", height="600", directed=False,
57
- nodeHighlightBehavior=True, highlightColor="#f1c40f",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  collapsible=True,
59
- node={"labelProperty": "label"}
 
60
  )
61
- return nodes, edges, config
 
 
1
+ #!/usr/bin/env python3
2
+ """MedGenesis – knowledge‑graph builder for Streamlit‑Agraph.
3
+
4
+ This version recognises **all new enrichment layers** introduced in the
5
+ latest orchestrator:
6
+ • UMLS concepts → green nodes
7
+ • MyGene / NCBI gene hits → purple nodes
8
+ • openFDA / DrugCentral drugs → orange nodes
9
+ • ClinicalTrials.gov studies → pink nodes
10
+ • Open Targets associations → red drug–gene / gene–disease edges
11
+ • Literature papers → blue nodes (tooltip = title)
12
+
13
+ The entry‑point `build_agraph` now receives a richer payload and returns
14
+ *(nodes, edges, config)* ready for `streamlit_agraph.agraph`.
15
+ """
16
+ from __future__ import annotations
17
 
 
18
  import re
19
+ from typing import List, Dict, Tuple
20
 
21
+ from streamlit_agraph import Node, Edge, Config
22
+
23
+ # ---------------------------------------------------------------------
24
+ # Colour palette (flat‑UI)
25
+ # ---------------------------------------------------------------------
26
+ C_PAPER = "#0984e3"
27
+ C_CONCEPT = "#00b894"
28
+ C_GENE = "#6c5ce7"
29
+ C_DRUG = "#d35400"
30
+ C_TRIAL = "#fd79a8"
31
+ C_OT_EDGE = "#c0392b"
32
+
33
+
34
+ # ---------------------------------------------------------------------
35
+ # Helper builders
36
+ # ---------------------------------------------------------------------
37
+
38
+ def _add_node(nodes: List[Node], node_id: str, label: str, color: str, tooltip: str | None = None, size: int = 25):
39
+ """Append Node only if id not yet present (agraph duplicates crash)."""
40
+ if any(n.id == node_id for n in nodes):
41
+ return
42
+ nodes.append(Node(id=node_id, label=label, color=color, size=size, tooltip=tooltip))
43
+
44
+
45
+ def _match(text: str, pattern: str) -> bool:
46
+ return bool(re.search(re.escape(pattern), text, flags=re.I))
47
+
48
+
49
+ # ---------------------------------------------------------------------
50
+ # Public API
51
+ # ---------------------------------------------------------------------
52
+
53
+ def build_agraph(
54
+ papers: List[Dict],
55
+ umls: List[Dict],
56
+ drug_safety: List[Dict],
57
+ genes: List[Dict] | None = None,
58
+ trials: List[Dict] | None = None,
59
+ ot_associations: List[Dict] | None = None,
60
+ ):
61
+ """Return (nodes, edges, config) for streamlit_agraph. Safe‑duplicates.
62
+
63
+ Parameters
64
+ ----------
65
+ papers : PubMed / arXiv merged list (dicts with title & summary).
66
+ umls : List of UMLS concept dicts `{cui, name}`.
67
+ drug_safety : openFDA / DrugCentral outputs (mixed dict / list).
68
+ genes : Optional list with MyGene/NCBI dicts (symbol, name,...).
69
+ trials : Optional ClinicalTrials.gov v2 studies list.
70
+ ot_associations : Optional list from Open Targets.
71
  """
72
 
73
+ nodes: List[Node] = []
74
+ edges: List[Edge] = []
75
 
76
+ # 1️⃣ Concepts ----------------------------------------------------
77
  for c in umls:
78
+ cui, name = c.get("cui"), c.get("name", "")
 
79
  if cui and name:
80
+ cid = f"concept_{cui}"
81
+ _add_node(nodes, cid, name, C_CONCEPT)
82
 
83
+ # 2️⃣ Genes -------------------------------------------------------
84
+ genes = genes or []
85
+ for g in genes:
86
+ sym = g.get("symbol") or g.get("name")
87
+ gid = f"gene_{sym}"
88
+ tooltip = g.get("summary", "")
89
+ _add_node(nodes, gid, sym, C_GENE, tooltip=tooltip)
90
 
91
+ # 3️⃣ Drugs (normalize mixed structures) -------------------------
92
+ drug_tuples: List[Tuple[str, str]] = [] # (node_id, drug_name)
93
+ for i, dr in enumerate(drug_safety):
94
  recs = dr if isinstance(dr, list) else [dr]
95
  for j, rec in enumerate(recs):
96
+ name = (
97
+ rec.get("drug_name") or
98
+ rec.get("patient", {}).get("drug") or
99
+ rec.get("medicinalproduct") or
100
+ f"drug_{i}_{j}"
101
+ )
102
  did = f"drug_{i}_{j}"
103
+ drug_tuples.append((did, name))
104
+ _add_node(nodes, did, name, C_DRUG)
105
 
106
+ # 4️⃣ Trials ------------------------------------------------------
107
+ trials = trials or []
108
+ for t in trials:
109
+ nct = t.get("nctId") or t.get("nctid")
110
+ if not nct:
111
+ continue
112
+ tid = f"trial_{nct}"
113
+ label = nct
114
+ tooltip = t.get("briefTitle") or "Clinical trial"
115
+ _add_node(nodes, tid, label, C_TRIAL, tooltip=tooltip, size=20)
116
+
117
+ # 5️⃣ Papers & mention edges -------------------------------------
118
+ for idx, p in enumerate(papers):
119
+ pid = f"paper_{idx}"
120
+ _add_node(nodes, pid, f"P{idx+1}", C_PAPER, tooltip=p.get("title", ""), size=15)
121
+
122
+ text_blob = f"{p.get('title','')} {p.get('summary','')}".lower()
123
 
124
+ # concept links
 
125
  for c in umls:
126
+ if c.get("name") and _match(text_blob, c["name"]):
127
+ edges.append(Edge(source=pid, target=f"concept_{c['cui']}", label="mentions"))
128
+ # gene links
129
+ for g in genes:
130
+ if g.get("symbol") and _match(text_blob, g["symbol"]):
131
+ edges.append(Edge(source=pid, target=f"gene_{g['symbol']}", label="mentions"))
132
+ # drug links
133
+ for did, dname in drug_tuples:
134
+ if _match(text_blob, dname):
135
  edges.append(Edge(source=pid, target=did, label="mentions"))
136
 
137
+ # 6️⃣ Open Targets edges (drug–gene / gene–disease) --------------
138
+ if ot_associations:
139
+ for row in ot_associations:
140
+ gsym = row.get("target", {}).get("symbol")
141
+ dis = row.get("disease", {}).get("name")
142
+ score = row.get("score", 0)
143
+ if gsym and dis:
144
+ gid = f"gene_{gsym}"
145
+ did = f"disease_{dis}"
146
+ _add_node(nodes, did, dis, C_CONCEPT, size=20)
147
+ edges.append(Edge(source=gid, target=did, color=C_OT_EDGE, label=f"OT {score:.2f}"))
148
+
149
+ # 7️⃣ Config ------------------------------------------------------
150
+ cfg = Config(
151
+ directed=False,
152
+ width="100%",
153
+ height="600",
154
+ nodeHighlightBehavior=True,
155
+ highlightColor="#f1c40f",
156
  collapsible=True,
157
+ showLegend=False,
158
+ node={"labelProperty": "label"},
159
  )
160
+
161
+ return nodes, edges, cfg