mgbam commited on
Commit
39219c6
Β·
verified Β·
1 Parent(s): 80779c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -117
app.py CHANGED
@@ -1,64 +1,69 @@
1
  #!/usr/bin/env python3
2
- """
3
- MedGenesis AI – Streamlit UI (v3.1 β€’ June 2025)
4
-
5
- β€’ Dual-LLM selector (OpenAI | Gemini)
6
- β€’ Tabs:
7
- Results | Genes | Trials | Variants | Graph | Metrics | Visuals
8
- β€’ Robust PDF export (all Unicode β†’ Latin-1 safe)
9
- β€’ Null-safe handling of RuntimeError / HTTPStatusError placeholders
10
- β€’ Metrics tab now converts Edge objects β†’ {'source', 'target'} safely,
11
- preventing the KeyError you just saw.
12
- """
13
-
14
- from __future__ import annotations
15
  import os, pathlib, asyncio, re
16
  from pathlib import Path
 
17
 
18
  import streamlit as st
19
  import pandas as pd
20
  import plotly.express as px
21
- from streamlit_agraph import agraph, Node, Edge
22
  from fpdf import FPDF
 
23
 
 
24
  from mcp.orchestrator import orchestrate_search, answer_ai_question
25
  from mcp.workspace import get_workspace, save_query
26
  from mcp.knowledge_graph import build_agraph
27
  from mcp.graph_metrics import build_nx, get_top_hubs, get_density
 
28
 
29
- # ── Streamlit telemetry dir fix ─────────────────────────────────────
30
- os.environ["STREAMLIT_DATA_DIR"] = "/tmp/.streamlit"
31
- os.environ["XDG_STATE_HOME"] = "/tmp"
32
- os.environ["STREAMLIT_BROWSER_GATHERUSAGESTATS"] = "false"
33
  pathlib.Path("/tmp/.streamlit").mkdir(parents=True, exist_ok=True)
34
 
35
  ROOT = Path(__file__).parent
36
  LOGO = ROOT / "assets" / "logo.png"
37
 
38
- # ── PDF helper ──────────────────────────────────────────────────────
39
- def _latin1(txt: str) -> str:
 
 
 
40
  return txt.encode("latin-1", "replace").decode("latin-1")
41
 
 
42
  def _pdf(papers: list[dict]) -> bytes:
43
  pdf = FPDF()
44
  pdf.set_auto_page_break(auto=True, margin=15)
45
  pdf.add_page()
46
  pdf.set_font("Helvetica", size=11)
47
- pdf.cell(200, 8, _latin1("MedGenesis AI – Results"), ln=True, align="C")
 
48
  pdf.ln(3)
 
49
  for i, p in enumerate(papers, 1):
50
  pdf.set_font("Helvetica", "B", 11)
51
- pdf.multi_cell(0, 7, _latin1(f"{i}. {p['title']}"))
52
  pdf.set_font("Helvetica", "", 9)
53
- body = f"{p['authors']}\n{p['summary']}\n{p['link']}\n"
54
- pdf.multi_cell(0, 6, _latin1(body))
 
 
 
 
55
  pdf.ln(1)
 
 
56
  return pdf.output(dest="S").encode("latin-1", "replace")
57
 
58
- # ── Sidebar workspace ───────────────────────────────────────────────
59
- def _workspace_sidebar():
60
  with st.sidebar:
61
- st.header("πŸ—‚οΈ Workspace")
62
  ws = get_workspace()
63
  if not ws:
64
  st.info("Run a search then press **Save** to populate this list.")
@@ -67,40 +72,56 @@ def _workspace_sidebar():
67
  with st.expander(f"{i}. {item['query']}"):
68
  st.write(item["result"]["ai_summary"])
69
 
70
- # ── Main UI ──────────────────────────────────────────────────────────
 
 
 
71
  def render_ui() -> None:
72
  st.set_page_config("MedGenesis AI", layout="wide")
73
 
74
- # Session defaults
75
- defaults = {
76
  "query_result": None,
77
- "last_query": "",
78
- "last_llm": "openai",
79
  "followup_input": "",
80
  "followup_response": None,
81
- }
82
- for k, v in defaults.items():
 
83
  st.session_state.setdefault(k, v)
84
 
85
  _workspace_sidebar()
86
 
87
- # Header
88
- c1, c2 = st.columns([0.15, 0.85])
89
- with c1:
90
  if LOGO.exists():
91
- st.image(str(LOGO), width=105)
92
- with c2:
93
  st.markdown("## 🧬 **MedGenesis AI**")
94
  st.caption("Multi-source biomedical assistant Β· OpenAI / Gemini")
95
 
96
- # Controls
97
  llm = st.radio("LLM engine", ["openai", "gemini"], horizontal=True)
98
  query = st.text_input("Enter biomedical question",
99
  placeholder="e.g. CRISPR glioblastoma therapy")
100
 
101
- if st.button("Run Search πŸš€") and query:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  with st.spinner("Collecting literature & biomedical data …"):
103
  res = asyncio.run(orchestrate_search(query, llm=llm))
 
 
104
  st.session_state.update(
105
  query_result=res,
106
  last_query=query,
@@ -108,99 +129,94 @@ def render_ui() -> None:
108
  followup_input="",
109
  followup_response=None,
110
  )
 
111
 
112
- res: dict | None = st.session_state.query_result
113
  if not res:
114
- st.info("Enter a question and press **Run Search πŸš€**")
115
  return
116
 
117
- # Guarantee keys
118
- for k in (
119
- "papers", "umls", "drug_safety", "genes", "mesh_defs",
120
- "gene_disease", "clinical_trials", "variants"
121
- ):
122
- res.setdefault(k, [])
123
 
124
- # Tabs
125
- tabs = st.tabs([
126
- "Results", "Genes", "Trials", "Variants",
127
- "Graph", "Metrics", "Visuals"
128
- ])
129
-
130
- # Results tab -----------------------------------------------------
131
  with tabs[0]:
132
- st.subheader("Literature")
133
  for i, p in enumerate(res["papers"], 1):
134
- st.markdown(f"**{i}. [{p['title']}]({p['link']})** *{p['authors']}*")
 
 
 
135
  st.write(p["summary"])
136
- col1, col2 = st.columns(2)
137
- with col1:
 
138
  st.download_button(
139
  "CSV",
140
  pd.DataFrame(res["papers"]).to_csv(index=False),
141
  "papers.csv",
142
  "text/csv",
143
  )
144
- with col2:
145
  st.download_button("PDF", _pdf(res["papers"]),
146
  "papers.pdf", "application/pdf")
 
147
  if st.button("πŸ’Ύ Save"):
148
  save_query(st.session_state.last_query, res)
149
  st.success("Saved to workspace")
150
 
151
  st.subheader("UMLS concepts")
152
- for c in res["umls"]:
153
  if isinstance(c, dict) and c.get("cui"):
154
  st.write(f"- **{c['name']}** ({c['cui']})")
155
 
156
  st.subheader("OpenFDA safety signals")
157
- for d in res["drug_safety"]:
158
  st.json(d)
159
 
160
  st.subheader("AI summary")
161
  st.info(res["ai_summary"])
162
 
163
- # Genes tab -------------------------------------------------------
164
  with tabs[1]:
165
  st.header("Gene / Variant signals")
166
- clean = [g for g in res["genes"] if isinstance(g, dict)]
167
- if not clean:
168
- st.info("No gene metadata (API may be rate-limited).")
169
- else:
170
- for g in clean:
171
- lab = g.get("name") or g.get("symbol") or str(g.get("geneid", ""))
172
- st.write(f"- **{lab}**")
173
-
 
174
  if res["gene_disease"]:
175
  st.markdown("### DisGeNET associations")
176
- st.json(res["gene_disease"][:15])
 
 
177
 
178
- if res["mesh_defs"]:
 
179
  st.markdown("### MeSH definitions")
180
- for d in res["mesh_defs"]:
181
- if d:
182
- st.write("-", d)
183
 
184
- # Trials tab ------------------------------------------------------
185
  with tabs[2]:
186
  st.header("Clinical trials")
187
- if not res["clinical_trials"]:
 
188
  st.info("No trials (rate-limited or none found).")
189
- else:
190
- for t in res["clinical_trials"]:
191
- st.markdown(f"**{t['nctId']}** – {t['briefTitle']}")
192
- st.write(f"Phase {t.get('phase')} | Status {t.get('status')}")
193
-
194
- # Variants tab ----------------------------------------------------
 
 
 
195
  with tabs[3]:
196
- st.header("Cancer variants (cBioPortal)")
197
- if not res["variants"]:
198
- st.info("No variant data.")
199
- else:
200
- st.json(res["variants"][:50])
201
-
202
- # Graph tab -------------------------------------------------------
203
- with tabs[4]:
204
  nodes, edges, cfg = build_agraph(
205
  res["papers"], res["umls"], res["drug_safety"]
206
  )
@@ -211,36 +227,37 @@ def render_ui() -> None:
211
  n.color = "#f1c40f" if pat.search(n.label) else "#d3d3d3"
212
  agraph(nodes, edges, cfg)
213
 
214
- # Metrics tab -----------------------------------------------------
215
- with tabs[5]:
216
- # Convert Edge objects β†’ dicts with guaranteed 'source'/'target'
217
- edge_dicts = [
218
- {"source": getattr(e, "source", getattr(e, "from", "")),
219
- "target": getattr(e, "target", getattr(e, "to", ""))}
220
- for e in edges if isinstance(e, Edge)
221
- if getattr(e, "source", getattr(e, "from", None))
222
- and getattr(e, "target", getattr(e, "to", None))
223
- ]
224
  G = build_nx(
225
  [n.__dict__ for n in nodes],
226
- edge_dicts,
227
  )
228
  st.metric("Density", f"{get_density(G):.3f}")
229
  st.markdown("**Top hubs**")
230
- for nid, sc in get_top_hubs(G):
231
- lab = next((n.label for n in nodes if n.id == nid), nid)
232
- st.write(f"- {lab} {sc:.3f}")
233
 
234
- # Visuals tab -----------------------------------------------------
235
- with tabs[6]:
236
- years = [p.get("published", "")[:4] for p in res["papers"] if p.get("published")]
 
 
 
237
  if years:
238
- st.plotly_chart(px.histogram(years, nbins=12,
239
- title="Publication Year"))
 
 
 
 
240
 
241
- # Follow-up QA ----------------------------------------------------
242
  st.markdown("---")
243
- st.text_input("Ask follow-up question:", key="followup_input")
 
 
244
 
245
  def _on_ask():
246
  q = st.session_state.followup_input.strip()
@@ -252,10 +269,11 @@ def render_ui() -> None:
252
  answer_ai_question(
253
  q,
254
  context=st.session_state.last_query,
255
- llm=st.session_state.last_llm,
256
- )
 
 
257
  )
258
- st.session_state.followup_response = ans["answer"]
259
 
260
  st.button("Ask AI", on_click=_on_ask)
261
 
@@ -263,5 +281,6 @@ def render_ui() -> None:
263
  st.write(st.session_state.followup_response)
264
 
265
 
 
266
  if __name__ == "__main__":
267
  render_ui()
 
1
  #!/usr/bin/env python3
2
+ # ──────────────────────────────────────────────────────────────────────
3
+ # MedGenesis AI – Streamlit UI (OpenAI + Gemini, CPU-only)
4
+ # ──────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
5
  import os, pathlib, asyncio, re
6
  from pathlib import Path
7
+ from datetime import datetime
8
 
9
  import streamlit as st
10
  import pandas as pd
11
  import plotly.express as px
 
12
  from fpdf import FPDF
13
+ from streamlit_agraph import agraph
14
 
15
+ # ── internal helpers --------------------------------------------------
16
  from mcp.orchestrator import orchestrate_search, answer_ai_question
17
  from mcp.workspace import get_workspace, save_query
18
  from mcp.knowledge_graph import build_agraph
19
  from mcp.graph_metrics import build_nx, get_top_hubs, get_density
20
+ from mcp.alerts import check_alerts
21
 
22
+ # ── Streamlit telemetry dir fix (HF Spaces sandbox quirks) ------------
23
+ os.environ["STREAMLIT_DATA_DIR"] = "/tmp/.streamlit"
24
+ os.environ["XDG_STATE_HOME"] = "/tmp"
25
+ os.environ["STREAMLIT_BROWSER_GATHERUSAGESTATS"] = "false"
26
  pathlib.Path("/tmp/.streamlit").mkdir(parents=True, exist_ok=True)
27
 
28
  ROOT = Path(__file__).parent
29
  LOGO = ROOT / "assets" / "logo.png"
30
 
31
+ # ══════════════════════════════════════════════════════════════════════
32
+ # Small util helpers
33
+ # ══════════════════════════════════════════════════════════════════════
34
+ def _latin1_safe(txt: str) -> str:
35
+ """Replace non-Latin-1 chars – keeps FPDF happy."""
36
  return txt.encode("latin-1", "replace").decode("latin-1")
37
 
38
+
39
  def _pdf(papers: list[dict]) -> bytes:
40
  pdf = FPDF()
41
  pdf.set_auto_page_break(auto=True, margin=15)
42
  pdf.add_page()
43
  pdf.set_font("Helvetica", size=11)
44
+ pdf.cell(200, 8, _latin1_safe("MedGenesis AI – Literature results"),
45
+ ln=True, align="C")
46
  pdf.ln(3)
47
+
48
  for i, p in enumerate(papers, 1):
49
  pdf.set_font("Helvetica", "B", 11)
50
+ pdf.multi_cell(0, 7, _latin1_safe(f"{i}. {p['title']}"))
51
  pdf.set_font("Helvetica", "", 9)
52
+ body = (
53
+ f"{p['authors']}\n"
54
+ f"{p['summary']}\n"
55
+ f"{p['link']}\n"
56
+ )
57
+ pdf.multi_cell(0, 6, _latin1_safe(body))
58
  pdf.ln(1)
59
+
60
+ # FPDF already returns latin-1 bytes – no extra encode needed
61
  return pdf.output(dest="S").encode("latin-1", "replace")
62
 
63
+
64
+ def _workspace_sidebar() -> None:
65
  with st.sidebar:
66
+ st.header("πŸ—‚ Workspace")
67
  ws = get_workspace()
68
  if not ws:
69
  st.info("Run a search then press **Save** to populate this list.")
 
72
  with st.expander(f"{i}. {item['query']}"):
73
  st.write(item["result"]["ai_summary"])
74
 
75
+
76
+ # ══════════════════════════════════════════════════════════════════════
77
+ # Main Streamlit UI
78
+ # ══════════════════════════════════════════════════════════════════════
79
  def render_ui() -> None:
80
  st.set_page_config("MedGenesis AI", layout="wide")
81
 
82
+ # ── Session-state defaults ────────────────────────────────────────
83
+ for k, v in {
84
  "query_result": None,
 
 
85
  "followup_input": "",
86
  "followup_response": None,
87
+ "last_query": "",
88
+ "last_llm": "",
89
+ }.items():
90
  st.session_state.setdefault(k, v)
91
 
92
  _workspace_sidebar()
93
 
94
+ col_logo, col_title = st.columns([0.15, 0.85])
95
+ with col_logo:
 
96
  if LOGO.exists():
97
+ st.image(LOGO, width=110)
98
+ with col_title:
99
  st.markdown("## 🧬 **MedGenesis AI**")
100
  st.caption("Multi-source biomedical assistant Β· OpenAI / Gemini")
101
 
 
102
  llm = st.radio("LLM engine", ["openai", "gemini"], horizontal=True)
103
  query = st.text_input("Enter biomedical question",
104
  placeholder="e.g. CRISPR glioblastoma therapy")
105
 
106
+ # ── alert notifications (async) ───────────────────────────────────
107
+ saved_qs = [w["query"] for w in get_workspace()]
108
+ if saved_qs:
109
+ try:
110
+ news = asyncio.run(check_alerts(saved_qs))
111
+ if news:
112
+ with st.sidebar:
113
+ st.subheader("πŸ”” New papers")
114
+ for q, lnks in news.items():
115
+ st.write(f"**{q}** – {len(lnks)} new")
116
+ except Exception:
117
+ pass # network hiccups – silent
118
+
119
+ # ── Run Search ----------------------------------------------------
120
+ if st.button("Run Search πŸš€") and query.strip():
121
  with st.spinner("Collecting literature & biomedical data …"):
122
  res = asyncio.run(orchestrate_search(query, llm=llm))
123
+
124
+ # store in session
125
  st.session_state.update(
126
  query_result=res,
127
  last_query=query,
 
129
  followup_input="",
130
  followup_response=None,
131
  )
132
+ st.success(f"Completed with **{res['llm_used'].title()}**")
133
 
134
+ res = st.session_state.query_result
135
  if not res:
136
+ st.info("Enter a biomedical question and press **Run Search πŸš€**")
137
  return
138
 
139
+ # ── Tabs ----------------------------------------------------------
140
+ tabs = st.tabs(["Results", "Genes", "Trials",
141
+ "Graph", "Metrics", "Visuals"])
 
 
 
142
 
143
+ # 1) Results -------------------------------------------------------
 
 
 
 
 
 
144
  with tabs[0]:
 
145
  for i, p in enumerate(res["papers"], 1):
146
+ st.markdown(
147
+ f"**{i}. [{p['title']}]({p['link']})** "
148
+ f"*{p['authors']}*"
149
+ )
150
  st.write(p["summary"])
151
+
152
+ c_csv, c_pdf = st.columns(2)
153
+ with c_csv:
154
  st.download_button(
155
  "CSV",
156
  pd.DataFrame(res["papers"]).to_csv(index=False),
157
  "papers.csv",
158
  "text/csv",
159
  )
160
+ with c_pdf:
161
  st.download_button("PDF", _pdf(res["papers"]),
162
  "papers.pdf", "application/pdf")
163
+
164
  if st.button("πŸ’Ύ Save"):
165
  save_query(st.session_state.last_query, res)
166
  st.success("Saved to workspace")
167
 
168
  st.subheader("UMLS concepts")
169
+ for c in (res["umls"] or []):
170
  if isinstance(c, dict) and c.get("cui"):
171
  st.write(f"- **{c['name']}** ({c['cui']})")
172
 
173
  st.subheader("OpenFDA safety signals")
174
+ for d in (res["drug_safety"] or []):
175
  st.json(d)
176
 
177
  st.subheader("AI summary")
178
  st.info(res["ai_summary"])
179
 
180
+ # 2) Genes ---------------------------------------------------------
181
  with tabs[1]:
182
  st.header("Gene / Variant signals")
183
+ genes_list = [
184
+ g for g in res["genes"]
185
+ if isinstance(g, dict) and (g.get("symbol") or g.get("name"))
186
+ ]
187
+ if not genes_list:
188
+ st.info("No gene hits (rate-limited or none found).")
189
+ for g in genes_list:
190
+ st.write(f"- **{g.get('symbol') or g.get('name')}** "
191
+ f"{g.get('description','')}")
192
  if res["gene_disease"]:
193
  st.markdown("### DisGeNET associations")
194
+ ok = [d for d in res["gene_disease"] if isinstance(d, dict)]
195
+ if ok:
196
+ st.json(ok[:15])
197
 
198
+ defs = [d for d in res["mesh_defs"] if isinstance(d, str) and d]
199
+ if defs:
200
  st.markdown("### MeSH definitions")
201
+ for d in defs:
202
+ st.write("-", d)
 
203
 
204
+ # 3) Trials --------------------------------------------------------
205
  with tabs[2]:
206
  st.header("Clinical trials")
207
+ ct = res["clinical_trials"]
208
+ if not ct:
209
  st.info("No trials (rate-limited or none found).")
210
+ for t in ct:
211
+ nct = t.get("NCTId", [""])[0]
212
+ bttl = t.get("BriefTitle", [""])[0]
213
+ phase= t.get("Phase", [""])[0]
214
+ stat = t.get("OverallStatus", [""])[0]
215
+ st.markdown(f"**{nct}** – {bttl}")
216
+ st.write(f"Phase {phase} | Status {stat}")
217
+
218
+ # 4) Graph ---------------------------------------------------------
219
  with tabs[3]:
 
 
 
 
 
 
 
 
220
  nodes, edges, cfg = build_agraph(
221
  res["papers"], res["umls"], res["drug_safety"]
222
  )
 
227
  n.color = "#f1c40f" if pat.search(n.label) else "#d3d3d3"
228
  agraph(nodes, edges, cfg)
229
 
230
+ # 5) Metrics -------------------------------------------------------
231
+ with tabs[4]:
 
 
 
 
 
 
 
 
232
  G = build_nx(
233
  [n.__dict__ for n in nodes],
234
+ [e.__dict__ for e in edges],
235
  )
236
  st.metric("Density", f"{get_density(G):.3f}")
237
  st.markdown("**Top hubs**")
238
+ for nid, sc in get_top_hubs(G, k=5):
239
+ label = next((n.label for n in nodes if n.id == nid), nid)
240
+ st.write(f"- {label} {sc:.3f}")
241
 
242
+ # 6) Visuals -------------------------------------------------------
243
+ with tabs[5]:
244
+ years = [
245
+ p["published"][:4] for p in res["papers"]
246
+ if p.get("published") and len(p["published"]) >= 4
247
+ ]
248
  if years:
249
+ st.plotly_chart(
250
+ px.histogram(
251
+ years, nbins=min(15, len(set(years))),
252
+ title="Publication Year"
253
+ )
254
+ )
255
 
256
+ # ── Follow-up Q-A -------------------------------------------------
257
  st.markdown("---")
258
+ st.text_input("Ask follow-up question:",
259
+ key="followup_input",
260
+ placeholder="e.g. Any Phase III trials recruiting now?")
261
 
262
  def _on_ask():
263
  q = st.session_state.followup_input.strip()
 
269
  answer_ai_question(
270
  q,
271
  context=st.session_state.last_query,
272
+ llm=st.session_state.last_llm)
273
+ )
274
+ st.session_state.followup_response = (
275
+ ans.get("answer") or "LLM unavailable or quota exceeded."
276
  )
 
277
 
278
  st.button("Ask AI", on_click=_on_ask)
279
 
 
281
  st.write(st.session_state.followup_response)
282
 
283
 
284
+ # ── entry-point ───────────────────────────────────────────────────────
285
  if __name__ == "__main__":
286
  render_ui()