mgbam commited on
Commit
f9a0bdb
Β·
verified Β·
1 Parent(s): fe00e4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +201 -171
app.py CHANGED
@@ -1,63 +1,62 @@
1
- # app.py – Streamlit front‑end for MedGenesis
2
-
3
- """CPU‑only demo that can run on HF Spaces.
4
- Requirements (environment variables / HF πŸŽ™ secrets):
5
-
6
- OPENAI_API_KEY / GEMINI_KEY – LLMs
7
- PUB_KEY / UMLS_KEY / DISGENET_KEY ... – data APIs (optional)
8
- MYGENE_KEY / OT_KEY / CBIO_KEY – new APIs (optional)
9
-
10
- Run locally:
11
- streamlit run app.py --server.headless true --server.address 0.0.0.0
12
- """
13
-
14
- from __future__ import annotations
15
- import os, asyncio, re, pathlib
16
  from pathlib import Path
17
 
18
  import streamlit as st
19
  import pandas as pd
20
  import plotly.express as px
21
- from fpdf import FPDF
22
  from streamlit_agraph import agraph
 
23
 
24
- from mcp.orchestrator import orchestrate_search, answer_ai_question
25
- from mcp.workspace import get_workspace, save_query
26
  from mcp.knowledge_graph import build_agraph
27
- from mcp.graph_utils import build_nx, get_top_hubs, get_density
28
- from mcp.alerts import check_alerts
29
 
30
- # --- Streamlit telemetry dir fix ------------------------------------------
31
- os.environ.setdefault("STREAMLIT_DATA_DIR", "/tmp/.streamlit")
32
- os.environ.setdefault("XDG_STATE_HOME", "/tmp")
33
  os.environ["STREAMLIT_BROWSER_GATHERUSAGESTATS"] = "false"
34
- pathlib.Path(os.environ["STREAMLIT_DATA_DIR"]).mkdir(parents=True, exist_ok=True)
35
 
36
  ROOT = Path(__file__).parent
37
  LOGO = ROOT / "assets" / "logo.png"
38
 
39
- # --- helpers --------------------------------------------------------------
40
- LATIN1 = str.maketrans({**{chr(i): "?" for i in range(256, 0x110000)}})
 
41
 
42
- def _pdf(papers: list[dict]) -> bytes:
43
  pdf = FPDF()
44
  pdf.set_auto_page_break(auto=True, margin=15)
45
  pdf.add_page()
46
  pdf.set_font("Helvetica", size=11)
47
- pdf.multi_cell(0, 8, "MedGenesis AI – Results", align="C")
48
  pdf.ln(3)
49
 
50
  for i, p in enumerate(papers, 1):
51
  pdf.set_font("Helvetica", "B", 11)
52
- pdf.multi_cell(0, 7, f"{i}. {p['title']}".translate(LATIN1))
53
- pdf.set_font("Helvetica", size=9)
54
- body = f"{p['authors']}\n{p['summary']}\n{p['link']}\n"
55
- pdf.multi_cell(0, 6, body.translate(LATIN1))
 
 
 
 
56
  pdf.ln(1)
57
  return pdf.output(dest="S").encode("latin-1", "replace")
58
 
59
- # --- sidebar --------------------------------------------------------------
60
-
61
  def _workspace_sidebar():
62
  with st.sidebar:
63
  st.header("πŸ—‚οΈ Workspace")
@@ -69,159 +68,190 @@ def _workspace_sidebar():
69
  with st.expander(f"{i}. {item['query']}"):
70
  st.write(item["result"]["ai_summary"])
71
 
72
- # --- UI -------------------------------------------------------------------
73
-
74
  def render_ui():
75
  st.set_page_config("MedGenesis AI", layout="wide")
76
 
77
- # SessionΒ state --------------------------------------------------------
78
- for k, v in {"query_result": None, "followup_input": "", "followup_response": None,
79
- "last_query": "", "last_llm": "openai", "tab": 0}.items():
80
- st.session_state.setdefault(k, v)
 
 
 
 
 
 
81
 
82
  _workspace_sidebar()
83
 
 
84
  c1, c2 = st.columns([0.15, 0.85])
85
  with c1:
86
  if LOGO.exists():
87
  st.image(str(LOGO), width=105)
88
  with c2:
89
  st.markdown("## 🧬 **MedGenesis AI**")
90
- st.caption("Multi‑source biomedical assistant Β· OpenAI / Gemini")
91
-
92
- llm = st.radio("LLM engine", ["openai", "gemini"], horizontal=True, index=(0 if st.session_state.last_llm=="openai" else 1))
93
- query = st.text_input("Enter biomedical question", st.session_state.last_query or "e.g. CRISPR glioblastoma therapy")
94
-
95
- # alerts ---------------------------------------------------------------
96
- if work := get_workspace():
97
- try:
98
- news = asyncio.run(check_alerts([w["query"] for w in work]))
99
- if news:
100
- with st.sidebar:
101
- st.subheader("πŸ”” New papers")
102
- for q, lnks in news.items():
103
- st.write(f"**{q}** – {len(lnks)} new")
104
- except Exception:
105
- pass
106
-
107
- # run search -----------------------------------------------------------
108
- if st.button("Run Search πŸš€"):
109
- if not query.strip():
110
- st.warning("Please enter a biomedical question first.")
111
- else:
112
- with st.spinner("Collecting literature & biomedical data …"):
113
- res = asyncio.run(orchestrate_search(query, llm=llm))
114
- st.session_state.update({
115
- "query_result": res,
116
- "last_query": query,
117
- "last_llm": llm,
118
- "followup_input": "",
119
- "followup_response": None,
120
- })
121
- st.success(f"Completed with **{res['llm_used'].title()}**")
122
 
123
  res = st.session_state.query_result
124
- if not res:
125
- st.info("Enter a question and press **Run Search πŸš€**")
126
- return
127
-
128
- # --- tabs -------------------------------------------------------------
129
- tabs = st.tabs(["Results", "Genes", "Trials", "Graph", "Metrics", "Visuals"])
130
-
131
- # Results --------------------------------------------------------------
132
- with tabs[0]:
133
- for i, p in enumerate(res["papers"], 1):
134
- st.markdown(f"**{i}. [{p['title']}]({p['link']})** *{p['authors']}*")
135
- st.write(p["summary"])
136
- col1, col2 = st.columns(2)
137
- with col1:
138
- st.download_button("CSV", pd.DataFrame(res["papers"]).to_csv(index=False), "papers.csv", "text/csv")
139
- with col2:
140
- st.download_button("PDF", _pdf(res["papers"]), "papers.pdf", "application/pdf")
141
- if st.button("πŸ’Ύ Save"):
142
- save_query(st.session_state.last_query, res)
143
- st.success("Saved to workspace")
144
-
145
- st.subheader("UMLS concepts")
146
- for c in res["umls"]:
147
- if isinstance(c, dict) and c.get("cui"):
148
- st.write(f"- **{c['name']}** ({c['cui']})")
149
-
150
- st.subheader("OpenFDA safety")
151
- for d in res["drug_safety"]:
152
- st.json(d)
153
-
154
- st.subheader("AI summary")
155
- st.info(res["ai_summary"])
156
-
157
- # Genes ----------------------------------------------------------------
158
- with tabs[1]:
159
- st.header("Gene / Variant signals")
160
- genes = res.get("genes") or []
161
- if not genes:
162
- st.info("No gene hits (rate‑limited or none found).")
163
- for g in genes:
164
- sym = g.get("symbol") or g.get("approvedSymbol") or g.get("name", "")
165
- summ = g.get("summary") or g.get("description", "")
166
- st.write(f"- **{sym}** {summ}")
167
- if res["gene_disease"]:
168
- st.markdown("### DisGeNET links")
169
- st.json(res["gene_disease"][:15])
170
- if res["mesh_defs"]:
171
- st.markdown("### MeSH definitions")
172
- for d in res["mesh_defs"]:
173
- if d:
174
- st.write("-", d)
175
-
176
- # Trials ---------------------------------------------------------------
177
- with tabs[2]:
178
- st.header("Clinical trials")
179
- trials = res.get("clinical_trials") or []
180
- if not trials:
181
- st.info("No trials (rate‑limited or none found).")
182
- for t in trials:
183
- st.markdown(f"**{t['nctId']}** – {t['briefTitle']}")
184
- st.write(f"Phase {t.get('phase')} | Status {t.get('status')}")
185
-
186
- # Graph ---------------------------------------------------------------
187
- with tabs[3]:
188
- nodes, edges, cfg = build_agraph(res["papers"], res["umls"], res["drug_safety"])
189
- hl = st.text_input("Highlight node:", key="hl")
190
- if hl:
191
- pat = re.compile(re.escape(hl), re.I)
192
- for n in nodes:
193
- n.color = "#f1c40f" if pat.search(n.label) else "#d3d3d3"
194
- agraph(nodes, edges, cfg)
195
-
196
- # Metrics -------------------------------------------------------------
197
- with tabs[4]:
198
- nodes, edges, _ = build_agraph(res["papers"], res["umls"], res["drug_safety"])
199
- G = build_nx([n.__dict__ for n in nodes], [e.__dict__ for e in edges])
200
- st.metric("Density", f"{get_density(G):.3f}")
201
- st.markdown("**Top hubs**")
202
- for nid, sc in get_top_hubs(G):
203
- lab = next((n.label for n in nodes if n.id == nid), nid)
204
- st.write(f"- {lab} {sc:.3f}")
205
-
206
- # Visuals -------------------------------------------------------------
207
- with tabs[5]:
208
- years = [int(p["published"][:4]) for p in res["papers"] if p.get("published", "").isdigit()]
209
- if years:
210
- st.plotly_chart(px.histogram(years, nbins=12, title="Publication Year"))
211
-
212
- # Follow‑up QA --------------------------------------------------------
213
- st.markdown("---")
214
- st.text_input("Ask follow‑up question:", key="followup_input", placeholder="e.g. Any phase III trials recruiting now?")
215
- if st.button("Ask AI"):
216
- q = st.session_state.followup_input.strip()
217
- if not q:
218
- st.warning("Please type a question first.")
219
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  with st.spinner("Querying LLM …"):
221
- ans = asyncio.run(answer_ai_question(q, context=st.session_state.last_query, llm=st.session_state.last_llm))
 
 
 
 
 
 
222
  st.session_state.followup_response = ans["answer"]
223
- if st.session_state.followup_response:
224
- st.write(st.session_state.followup_response)
 
 
 
 
 
 
 
225
 
226
  if __name__ == "__main__":
227
  render_ui()
 
1
+ #!/usr/bin/env python3
2
+ # app.py – MedGenesis AI Β· Streamlit front-end (v3)
3
+ # ---------------------------------------------------
4
+ # β€’ Dual-LLM selector (OpenAI | Gemini)
5
+ # β€’ Robust PDF export (all Unicode β†’ Latin-1 safe)
6
+ # β€’ Lazy session-state handling so a failed background
7
+ # request never kills the whole app.
8
+ # β€’ New β€œVariants” tab (cBioPortal) + null-safe β€œGraph”
9
+ # and β€œMetrics” using the patched helpers.
10
+
11
+ import os, pathlib, asyncio, re
 
 
 
 
12
  from pathlib import Path
13
 
14
  import streamlit as st
15
  import pandas as pd
16
  import plotly.express as px
 
17
  from streamlit_agraph import agraph
18
+ from fpdf import FPDF
19
 
20
+ from mcp.orchestrator import orchestrate_search, answer_ai_question
21
+ from mcp.workspace import get_workspace, save_query
22
  from mcp.knowledge_graph import build_agraph
23
+ from mcp.graph_metrics import build_nx, get_top_hubs, get_density
 
24
 
25
+ # ── Streamlit telemetry dir fix ─────────────────────────────────────
26
+ os.environ["STREAMLIT_DATA_DIR"] = "/tmp/.streamlit"
27
+ os.environ["XDG_STATE_HOME"] = "/tmp"
28
  os.environ["STREAMLIT_BROWSER_GATHERUSAGESTATS"] = "false"
29
+ pathlib.Path("/tmp/.streamlit").mkdir(parents=True, exist_ok=True)
30
 
31
  ROOT = Path(__file__).parent
32
  LOGO = ROOT / "assets" / "logo.png"
33
 
34
+ # ── PDF export helper (robust to ALL Unicode) ───────────────────────
35
+ def _latin1_safe(txt: str) -> str:
36
+ return txt.encode("latin-1", "replace").decode("latin-1")
37
 
38
+ def _pdf(papers):
39
  pdf = FPDF()
40
  pdf.set_auto_page_break(auto=True, margin=15)
41
  pdf.add_page()
42
  pdf.set_font("Helvetica", size=11)
43
+ pdf.cell(200, 8, _latin1_safe("MedGenesis AI – Results"), ln=True, align="C")
44
  pdf.ln(3)
45
 
46
  for i, p in enumerate(papers, 1):
47
  pdf.set_font("Helvetica", "B", 11)
48
+ pdf.multi_cell(0, 7, _latin1_safe(f"{i}. {p['title']}"))
49
+ pdf.set_font("Helvetica", "", 9)
50
+ body = (
51
+ f"{p['authors']}\n"
52
+ f"{p['summary']}\n"
53
+ f"{p['link']}\n"
54
+ )
55
+ pdf.multi_cell(0, 6, _latin1_safe(body))
56
  pdf.ln(1)
57
  return pdf.output(dest="S").encode("latin-1", "replace")
58
 
59
+ # ── Sidebar workspace ───────────────────────────────────────────────
 
60
  def _workspace_sidebar():
61
  with st.sidebar:
62
  st.header("πŸ—‚οΈ Workspace")
 
68
  with st.expander(f"{i}. {item['query']}"):
69
  st.write(item["result"]["ai_summary"])
70
 
71
+ # ── UI main routine ─────────────────────────────────────────────────
 
72
  def render_ui():
73
  st.set_page_config("MedGenesis AI", layout="wide")
74
 
75
+ # Session-state defaults
76
+ for key, default in {
77
+ "query_result" : None,
78
+ "last_query" : "",
79
+ "last_llm" : "openai",
80
+ "followup_input" : "",
81
+ "followup_response": None,
82
+ }.items():
83
+ if key not in st.session_state:
84
+ st.session_state[key] = default
85
 
86
  _workspace_sidebar()
87
 
88
+ # Header block
89
  c1, c2 = st.columns([0.15, 0.85])
90
  with c1:
91
  if LOGO.exists():
92
  st.image(str(LOGO), width=105)
93
  with c2:
94
  st.markdown("## 🧬 **MedGenesis AI**")
95
+ st.caption("Multi-source biomedical assistant – OpenAI / Gemini")
96
+
97
+ # Controls
98
+ llm = st.radio("LLM engine", ["openai", "gemini"],
99
+ horizontal=True, index=0)
100
+ query = st.text_input("Enter biomedical question",
101
+ placeholder="e.g. CRISPR glioblastoma therapy")
102
+
103
+ # Run search
104
+ if st.button("Run Search πŸš€") and query:
105
+ with st.spinner("Collecting literature & biomedical data …"):
106
+ res = asyncio.run(orchestrate_search(query, llm=llm))
107
+ st.session_state.query_result = res
108
+ st.session_state.last_query = query
109
+ st.session_state.last_llm = llm
110
+ st.session_state.followup_input = ""
111
+ st.session_state.followup_response = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  res = st.session_state.query_result
114
+ if res:
115
+ # Guard against missing keys
116
+ for key in (
117
+ "papers", "umls", "drug_safety", "genes", "mesh_defs",
118
+ "gene_disease", "clinical_trials", "variants"
119
+ ):
120
+ res.setdefault(key, [])
121
+
122
+ # -------------- TABS -------------------------------------------------
123
+ tabs = st.tabs([
124
+ "Results", "Genes", "Trials", "Variants",
125
+ "Graph", "Metrics", "Visuals"
126
+ ])
127
+
128
+ # ── Results tab ─────────────────────────────────────────────────────
129
+ with tabs[0]:
130
+ st.subheader("Literature")
131
+ for i, p in enumerate(res["papers"], 1):
132
+ st.markdown(f"**{i}. [{p['title']}]({p['link']})** *{p['authors']}*")
133
+ st.write(p["summary"])
134
+ col1, col2 = st.columns(2)
135
+ with col1:
136
+ st.download_button(
137
+ "CSV",
138
+ pd.DataFrame(res["papers"]).to_csv(index=False),
139
+ "papers.csv",
140
+ "text/csv",
141
+ )
142
+ with col2:
143
+ st.download_button(
144
+ "PDF",
145
+ _pdf(res["papers"]),
146
+ "papers.pdf",
147
+ "application/pdf",
148
+ )
149
+ if st.button("πŸ’Ύ Save"):
150
+ save_query(st.session_state.last_query, res)
151
+ st.success("Saved to workspace")
152
+
153
+ st.subheader("UMLS concepts")
154
+ for c in res["umls"]:
155
+ if c.get("cui"):
156
+ st.write(f"- **{c['name']}** ({c['cui']})")
157
+
158
+ st.subheader("OpenFDA safety signals")
159
+ for d in res["drug_safety"]:
160
+ st.json(d)
161
+
162
+ st.subheader("AI summary")
163
+ st.info(res["ai_summary"])
164
+
165
+ # ── Genes tab ───────────────────────────────────────────────────────
166
+ with tabs[1]:
167
+ st.header("Gene / Variant signals")
168
+ for g in res["genes"]:
169
+ lab = g.get("name") or g.get("symbol") or g.get("geneid")
170
+ st.write(f"- **{lab}**")
171
+ if res["gene_disease"]:
172
+ st.markdown("### DisGeNET associations")
173
+ st.json(res["gene_disease"][:15])
174
+ if res["mesh_defs"]:
175
+ st.markdown("### MeSH definitions")
176
+ for d in res["mesh_defs"]:
177
+ if d:
178
+ st.write("-", d)
179
+
180
+ # ── Trials tab ──────────────────────────────────────────────────────
181
+ with tabs[2]:
182
+ st.header("Clinical trials")
183
+ if not res["clinical_trials"]:
184
+ st.info("No trials (rate-limited or none found).")
185
+ for t in res["clinical_trials"]:
186
+ st.markdown(f"**{t['nctId']}** – {t['briefTitle']}")
187
+ st.write(f"Phase {t.get('phase')} | Status {t.get('status')}")
188
+
189
+ # ── Variants tab ────────────────────────────────────────────────────
190
+ with tabs[3]:
191
+ st.header("Cancer variants (cBioPortal)")
192
+ if not res["variants"]:
193
+ st.info("No variant data.")
194
+ else:
195
+ st.json(res["variants"][:50])
196
+
197
+ # ── Graph tab ───────────────────────────────────────────────────────
198
+ with tabs[4]:
199
+ nodes, edges, cfg = build_agraph(
200
+ res["papers"], res["umls"], res["drug_safety"]
201
+ )
202
+ hl = st.text_input("Highlight node:", key="hl")
203
+ if hl:
204
+ pat = re.compile(re.escape(hl), re.I)
205
+ for n in nodes:
206
+ n.color = "#f1c40f" if pat.search(n.label) else "#d3d3d3"
207
+ agraph(nodes, edges, cfg)
208
+
209
+ # ── Metrics tab ─────────────────────────────────────────────────────
210
+ with tabs[5]:
211
+ G = build_nx(
212
+ [n.__dict__ for n in nodes],
213
+ [e.__dict__ for e in edges],
214
+ )
215
+ st.metric("Density", f"{get_density(G):.3f}")
216
+ st.markdown("**Top hubs**")
217
+ for nid, sc in get_top_hubs(G):
218
+ lab = next((n.label for n in nodes if n.id == nid), nid)
219
+ st.write(f"- {lab} {sc:.3f}")
220
+
221
+ # ── Visuals tab ────────────────────────────────────────────────────
222
+ with tabs[6]:
223
+ years = [p.get("published", "")[:4] for p in res["papers"] if p.get("published")]
224
+ if years:
225
+ st.plotly_chart(px.histogram(years, nbins=12,
226
+ title="Publication Year"))
227
+
228
+ # ── Follow-up Q-A block ────────────────────────────────────────────
229
+ st.markdown("---")
230
+ st.text_input("Ask follow-up question:", key="followup_input")
231
+
232
+ def _on_ask():
233
+ q = st.session_state.followup_input.strip()
234
+ if not q:
235
+ st.warning("Please type a question first.")
236
+ return
237
  with st.spinner("Querying LLM …"):
238
+ ans = asyncio.run(
239
+ answer_ai_question(
240
+ q,
241
+ context=st.session_state.last_query,
242
+ llm=st.session_state.last_llm,
243
+ )
244
+ )
245
  st.session_state.followup_response = ans["answer"]
246
+
247
+ st.button("Ask AI", on_click=_on_ask)
248
+
249
+ if st.session_state.followup_response:
250
+ st.write(st.session_state.followup_response)
251
+
252
+ else:
253
+ st.info("Enter a question and press **Run Search πŸš€**")
254
+
255
 
256
  if __name__ == "__main__":
257
  render_ui()