mgbam commited on
Commit
21a153e
·
verified ·
1 Parent(s): 402d6f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -268
app.py CHANGED
@@ -1,27 +1,27 @@
1
  import streamlit as st
2
  import requests
3
- from bs4 import BeautifulSoup
 
4
  import pandas as pd
5
  import matplotlib.pyplot as plt
6
  import seaborn as sns
7
- from rdkit import Chem
8
- from rdkit.Chem import Draw
9
  from fpdf import FPDF
10
  import tempfile
11
  import logging
12
- from typing import Optional, Dict, List, Any
13
  import os
14
  import plotly.graph_objects as go
15
  import networkx as nx
 
16
 
17
- # --- IMPORTANT: st.set_page_config MUST be the very first Streamlit command ---
 
 
 
18
  st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
19
-
20
- # Setup logging
21
  logging.basicConfig(level=logging.ERROR)
22
 
23
  # -----------------------------
24
- # API Endpoints (Centralized Configuration)
25
  # -----------------------------
26
  API_ENDPOINTS = {
27
  "clinical_trials": "https://clinicaltrials.gov/api/v2/studies", # no email required
@@ -29,7 +29,7 @@ API_ENDPOINTS = {
29
  "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
30
  "fda_drug_approval": "https://api.fda.gov/drug/label.json",
31
  "faers_adverse_events": "https://api.fda.gov/drug/event.json",
32
- # PharmGKB endpoints (expecting a PharmGKB accession, e.g. PA1234)
33
  "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
34
  "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
35
  "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
@@ -37,37 +37,37 @@ API_ENDPOINTS = {
37
  # RxNorm endpoints
38
  "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
39
  "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
40
- # RxClass endpoint
41
  "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
42
  }
43
 
44
- # -----------------------------------
45
- # Retrieve Secrets from st.secrets
46
- # -----------------------------------
47
  OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY")
48
  BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
49
  PUB_EMAIL = st.secrets.get("PUB_EMAIL")
50
  OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
51
 
52
  if not PUB_EMAIL:
53
- st.error("PubMed email (PUB_EMAIL) is not configured in secrets.")
54
  if not BIOPORTAL_API_KEY:
55
- st.error("BioPortal API key (BIOPORTAL_API_KEY) is not configured in secrets.")
56
  if not OPENFDA_KEY:
57
- st.error("OpenFDA API key (OPENFDA_KEY) is not configured in secrets.")
58
  if not OPENAI_API_KEY:
59
- st.error("OpenAI API key (OPENAI_API_KEY) is not configured in secrets.")
60
 
61
- # -----------------------------------
62
- # Initialize OpenAI Client (Latest SDK, GPT4)
63
- # -----------------------------------
64
  from openai import OpenAI
65
- client = OpenAI(api_key=OPENAI_API_KEY)
66
 
67
  def generate_content(prompt: str) -> str:
68
- """Generate content using GPT4 via the new OpenAI SDK."""
69
  try:
70
- completion = client.chat.completions.create(
71
  model="gpt-4",
72
  messages=[{"role": "user", "content": prompt}],
73
  max_tokens=300
@@ -82,48 +82,41 @@ def generate_content(prompt: str) -> str:
82
  # Utility Functions
83
  # -----------------------------
84
  def _query_api(endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict]:
85
- """Handles API requests with robust error handling."""
86
  try:
87
  response = requests.get(endpoint, params=params, headers=headers, timeout=15)
88
  response.raise_for_status()
89
  return response.json()
90
- except requests.exceptions.HTTPError as http_err:
91
- st.error(f"HTTP error: {http_err} for endpoint {endpoint}.")
92
- logging.error(f"HTTP error: {http_err} for endpoint {endpoint}.")
93
- except requests.exceptions.RequestException as req_err:
94
- st.error(f"Request error: {req_err} for endpoint {endpoint}.")
95
- logging.error(f"Request error: {req_err} for endpoint {endpoint}.")
96
  except Exception as e:
97
- st.error(f"Unexpected error: {e} for endpoint {endpoint}.")
98
- logging.error(f"Unexpected error: {e} for endpoint {endpoint}.")
99
  return None
100
 
101
  def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
102
- """Retrieves a drug's SMILES string from PubChem."""
103
  url = API_ENDPOINTS["pubchem"].format(drug_name)
104
  data = _query_api(url)
105
- if data and "PC_Compounds" in data and len(data["PC_Compounds"]) > 0:
106
  for prop in data["PC_Compounds"][0].get("props", []):
107
  if prop.get("name") == "Canonical SMILES":
108
  return prop["value"]["sval"]
109
  return None
110
 
111
  def _draw_molecule(smiles: str) -> Optional[Any]:
112
- """Generates a 2D image of a molecule from its SMILES string."""
113
  try:
114
  mol = Chem.MolFromSmiles(smiles)
115
  if mol:
116
  return Draw.MolToImage(mol)
117
  else:
118
- st.error("Invalid SMILES string.")
119
- return None
120
  except Exception as e:
121
  st.error(f"Error drawing molecule: {e}")
122
- logging.error(f"Molecule drawing error: {e}")
123
- return None
124
 
125
  def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
126
- """Retrieves generic drug details (molecular formula, IUPAC name, and canonical SMILES) from PubChem."""
127
  url = API_ENDPOINTS["pubchem"].format(drug_name)
128
  data = _query_api(url)
129
  details = {}
@@ -141,7 +134,7 @@ def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
141
  return None
142
 
143
  def _get_clinical_trials(query: str) -> Optional[Dict]:
144
- """Queries ClinicalTrials.gov using the 'query.term' parameter."""
145
  if query.upper().startswith("NCT") and query[3:].isdigit():
146
  params = {"id": query, "fmt": "json"}
147
  else:
@@ -149,12 +142,12 @@ def _get_clinical_trials(query: str) -> Optional[Dict]:
149
  return _query_api(API_ENDPOINTS["clinical_trials"], params)
150
 
151
  def _get_pubmed(query: str) -> Optional[Dict]:
152
- """Queries PubMed using E-utilities."""
153
  params = {"db": "pubmed", "term": query, "retmax": 10, "retmode": "json", "email": PUB_EMAIL}
154
  return _query_api(API_ENDPOINTS["pubmed"], params)
155
 
156
  def _get_fda_approval(drug_name: str) -> Optional[Dict]:
157
- """Retrieves FDA drug label approval info."""
158
  if not OPENFDA_KEY:
159
  st.error("OpenFDA API key not configured.")
160
  return None
@@ -166,7 +159,7 @@ def _get_fda_approval(drug_name: str) -> Optional[Dict]:
166
  return None
167
 
168
  def _analyze_adverse_events(drug_name: str, limit: int = 5) -> Optional[Dict]:
169
- """Fetches adverse event reports from FAERS."""
170
  if not OPENFDA_KEY:
171
  st.error("OpenFDA API key not configured.")
172
  return None
@@ -175,7 +168,7 @@ def _analyze_adverse_events(drug_name: str, limit: int = 5) -> Optional[Dict]:
175
  return _query_api(API_ENDPOINTS["faers_adverse_events"], params)
176
 
177
  def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
178
- """Fetches clinical annotations for a given variant from PharmGKB."""
179
  endpoint = API_ENDPOINTS["pharmgkb_variant_clinical_annotations"].format(variant_id)
180
  data = _query_api(endpoint)
181
  if data and data.get("data"):
@@ -184,12 +177,9 @@ def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
184
  return None
185
 
186
  def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]]:
187
- """
188
- Retrieves variant IDs for a gene using its PharmGKB accession.
189
- If the accession does not start with "PA", warn the user.
190
- """
191
  if not pharmgkb_gene_id.startswith("PA"):
192
- st.warning("Please provide a valid PharmGKB accession ID (e.g., PA1234).")
193
  return None
194
  endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(pharmgkb_gene_id)
195
  data = _query_api(endpoint)
@@ -199,11 +189,9 @@ def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]
199
  return None
200
 
201
  def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
202
- """
203
- Retrieves PharmGKB gene data using a PharmGKB accession.
204
- """
205
  if not pharmgkb_gene_id.startswith("PA"):
206
- st.warning("Please enter a valid PharmGKB gene accession ID (e.g., PA1234).")
207
  return None
208
  endpoint = API_ENDPOINTS["pharmgkb_gene"].format(pharmgkb_gene_id)
209
  data = _query_api(endpoint)
@@ -213,23 +201,23 @@ def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
213
  return None
214
 
215
  def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
216
- """Fetches ontology data from BioPortal."""
217
  if not BIOPORTAL_API_KEY:
218
  st.error("BioPortal API key not configured.")
219
  return None
220
  if not term:
221
- st.error("Please provide a search term for ontology search.")
222
  return None
223
  headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
224
  params = {"q": term, "ontologies": ontology}
225
  data = _query_api(API_ENDPOINTS["bioportal_search"], params, headers)
226
  if data and data.get("collection"):
227
  return data
228
- st.warning("No results found for the BioPortal query.")
229
  return None
230
 
231
  def _save_pdf_report(report_content: str, filename: str):
232
- """Saves report content as a PDF file."""
233
  try:
234
  pdf = FPDF()
235
  pdf.add_page()
@@ -238,28 +226,28 @@ def _save_pdf_report(report_content: str, filename: str):
238
  pdf.output(filename)
239
  return filename
240
  except Exception as e:
241
- st.error(f"Error saving PDF report: {e}")
242
- logging.error(f"PDF save error: {e}")
243
- return None
244
 
245
  def _display_dataframe(data: List[Dict[str, Any]], columns: List[str]):
246
- """Displays a DataFrame in Streamlit."""
247
  if data:
248
  df = pd.DataFrame(data, columns=columns)
249
  st.dataframe(df)
250
  return df
251
- st.warning("No data available for display.")
252
  return None
253
 
254
  def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -> go.Figure:
255
- """Creates an interactive network graph of gene-variant-drug relationships."""
256
  G = nx.Graph()
257
  G.add_node(gene, color="lightblue")
258
  for variant in variants:
259
  G.add_node(variant, color="lightgreen")
260
  G.add_edge(gene, variant)
261
  for drug in annotations.get(variant, []):
262
- if drug != "N/A":
263
  G.add_node(drug, color="lightcoral")
264
  G.add_edge(variant, drug)
265
  pos = nx.spring_layout(G)
@@ -270,11 +258,8 @@ def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -
270
  edge_x.extend([x0, x1, None])
271
  edge_y.extend([y0, y1, None])
272
  edge_trace = go.Scatter(
273
- x=edge_x,
274
- y=edge_y,
275
- line=dict(width=0.5, color="#888"),
276
- hoverinfo="none",
277
- mode="lines"
278
  )
279
  node_x, node_y, node_text, node_color = [], [], [], []
280
  for node in G.nodes():
@@ -284,26 +269,16 @@ def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -
284
  node_text.append(node)
285
  node_color.append(G.nodes[node]["color"])
286
  node_trace = go.Scatter(
287
- x=node_x,
288
- y=node_y,
289
- mode="markers+text",
290
- hoverinfo="text",
291
- text=node_text,
292
- textposition="bottom center",
293
- marker=dict(
294
- showscale=False,
295
- colorscale="YlGnBu",
296
- color=node_color,
297
- size=10,
298
- line_width=2
299
- )
300
  )
301
  fig = go.Figure(
302
  data=[edge_trace, node_trace],
303
  layout=go.Layout(
304
  title=dict(text="Gene-Variant-Drug Network", font=dict(size=16)),
305
- showlegend=False,
306
- hovermode="closest",
307
  margin=dict(b=20, l=5, r=5, t=40),
308
  xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
309
  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
@@ -312,10 +287,10 @@ def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -
312
  return fig
313
 
314
  # -----------------------------
315
- # New Functions: RxNorm and RxClass Integration
316
  # -----------------------------
317
  def get_rxnorm_rxcui(drug_name: str) -> Optional[str]:
318
- """Retrieves the RxNorm Concept Unique Identifier (RxCUI) for a drug name."""
319
  url = f"{API_ENDPOINTS['rxnorm_rxcui']}?name={drug_name}"
320
  data = _query_api(url)
321
  if data and "idGroup" in data and data["idGroup"].get("rxnormId"):
@@ -324,20 +299,23 @@ def get_rxnorm_rxcui(drug_name: str) -> Optional[str]:
324
  return None
325
 
326
  def get_rxnorm_properties(rxcui: str) -> Optional[Dict]:
327
- """Retrieves RxNorm properties for a given RxCUI."""
328
  url = API_ENDPOINTS["rxnorm_properties"].format(rxcui)
329
  return _query_api(url)
330
 
331
  def get_rxclass_by_drug_name(drug_name: str) -> Optional[Dict]:
332
- """Retrieves RxClass information for a drug by name."""
333
  url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
334
- return _query_api(url)
 
 
 
335
 
336
  # -----------------------------
337
- # New Function: Generate AI Insights for a Drug
338
  # -----------------------------
339
  def generate_drug_insights(drug_name: str) -> str:
340
- """Gathers FDA, PubChem, RxNorm, and RxClass data for a drug and uses GPT4 to generate innovative insights."""
341
  # FDA Data
342
  fda_info = _get_fda_approval(drug_name)
343
  fda_status = "Not Approved"
@@ -363,12 +341,9 @@ def generate_drug_insights(drug_name: str) -> str:
363
 
364
  # RxClass Data
365
  rxclass_data = get_rxclass_by_drug_name(drug_name)
366
- if rxclass_data and rxclass_data.get("classMember"):
367
- rxclass_info = f"RxClass: {rxclass_data}"
368
- else:
369
- rxclass_info = "No RxClass data available."
370
 
371
- # Construct prompt for GPT4 with all the gathered data
372
  prompt = (
373
  f"Drug Analysis Report for '{drug_name}':\n\n"
374
  f"**FDA Approval Status:** {fda_status}\n\n"
@@ -378,20 +353,16 @@ def generate_drug_insights(drug_name: str) -> str:
378
  f" - Canonical SMILES: {canonical_smiles}\n\n"
379
  f"**RxNorm Data:** {rxnorm_info}\n\n"
380
  f"**RxClass Data:** {rxclass_info}\n\n"
381
- f"As an advanced pharmacogenomics researcher and AI expert, please provide an innovative and comprehensive analysis of "
382
- f"the drug '{drug_name}'. In your response, include:\n"
383
- f"- Pharmacogenomic considerations\n"
384
- f"- Potential repurposing opportunities\n"
385
- f"- Regulatory insights and challenges\n"
386
- f"- Suggestions for further research and data integration\n\n"
387
- f"Present your answer in a clear, bullet-point format and feel free to add any novel ideas."
388
  )
389
 
390
  insights = generate_content(prompt)
391
  return insights
392
 
393
  # -----------------------------
394
- # Streamlit App Layout and Tabs
395
  # -----------------------------
396
  tabs = st.tabs([
397
  "💊 Drug Development",
@@ -404,68 +375,54 @@ tabs = st.tabs([
404
  "🤖 AI Insights"
405
  ])
406
 
407
- # -----------------------------
408
- # Tab 1: Drug Development
409
- # -----------------------------
410
  with tabs[0]:
411
  st.header("AI-Driven Drug Development Strategy")
412
  target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
413
- target_gene = st.text_input("Target Gene (PharmGKB Accession):", placeholder="Enter PharmGKB accession (e.g., PA1234)")
414
  strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
415
 
416
  if st.button("Generate Development Plan"):
417
  with st.spinner("Generating plan..."):
418
  plan_prompt = (
419
- f"Develop a comprehensive drug development plan for treating {target} "
420
- f"using a {strategy} strategy. Include sections on target validation, lead optimization, "
421
- f"preclinical testing, clinical trial design, regulatory submission strategy, market analysis, "
422
- f"and competitive landscape. Highlight key milestones and challenges."
423
  )
424
  plan = generate_content(plan_prompt)
425
  st.subheader("Comprehensive Development Plan")
426
  st.markdown(plan)
427
 
428
- # FDA Regulatory Insights
429
  if target:
430
  fda_info = _get_fda_approval(target.split()[0])
431
- st.subheader("FDA Regulatory Insights")
432
  if fda_info:
433
  st.json(fda_info)
434
  else:
435
  st.write("No FDA data found for the given target.")
436
 
437
- # Pharmacogenomic Considerations via PharmGKB
438
  st.subheader("Pharmacogenomic Considerations")
439
  if target_gene:
440
  if not target_gene.startswith("PA"):
441
- st.warning("Please provide a valid PharmGKB accession (e.g., PA1234) for the gene.")
442
  else:
443
  variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
444
  if variant_ids:
445
  annotations = {}
446
- for variant_id in variant_ids[:5]:
447
- pgx_data = _get_pharmgkb_clinical_annotations(variant_id)
448
- if pgx_data and pgx_data.get("data"):
449
- annotations[variant_id] = [anno.get("obj2Name", "N/A") for anno in pgx_data["data"]]
450
- else:
451
- annotations[variant_id] = []
452
- st.write(f"### Clinical Annotations for Variant: {variant_id}")
453
- if pgx_data:
454
- st.json(pgx_data)
455
- else:
456
- st.write(f"No annotations for variant {variant_id}.")
457
  else:
458
  st.write("No variants found for the specified PharmGKB gene accession.")
459
  else:
460
- st.write("Please enter a PharmGKB accession for the target gene to retrieve pharmacogenomic data.")
461
 
462
- # -----------------------------
463
- # Tab 2: Clinical Trial Analytics
464
- # -----------------------------
465
  with tabs[1]:
466
  st.header("Clinical Trial Landscape Analytics")
467
  trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number")
468
-
469
  if st.button("Analyze Trial Landscape"):
470
  with st.spinner("Fetching trial data..."):
471
  trials = _get_clinical_trials(trial_query)
@@ -476,15 +433,15 @@ with tabs[1]:
476
  "Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"),
477
  "Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"),
478
  "Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["Not Available"])[0],
479
- "Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "Not Available")
480
  })
481
  _display_dataframe(trial_data, list(trial_data[0].keys()))
482
  else:
483
- st.warning("No clinical trials found for the given query.")
484
 
485
  ae_data = _analyze_adverse_events(trial_query)
486
  if ae_data and ae_data.get("results"):
487
- st.subheader("Adverse Event Profile (Top 5 Reports)")
488
  ae_results = ae_data["results"][:5]
489
  ae_df = pd.json_normalize(ae_results)
490
  st.dataframe(ae_df)
@@ -503,17 +460,12 @@ with tabs[1]:
503
  else:
504
  st.write("No adverse event data available.")
505
 
506
- # -----------------------------
507
- # Tab 3: Molecular Profiling
508
- # -----------------------------
509
  with tabs[2]:
510
  st.header("Advanced Molecular Profiling")
511
  compound_input = st.text_input("Compound Identifier:", placeholder="Enter drug name, SMILES, or INN")
512
-
513
  if st.button("Analyze Compound"):
514
  with st.spinner("Querying PubChem..."):
515
- smiles = None
516
- # If input is a valid SMILES, use it; otherwise attempt to retrieve from PubChem.
517
  if Chem.MolFromSmiles(compound_input):
518
  smiles = compound_input
519
  else:
@@ -521,10 +473,9 @@ with tabs[2]:
521
  if smiles:
522
  img = _draw_molecule(smiles)
523
  if img:
524
- st.image(img, caption="2D Structure")
525
  else:
526
- st.error("Compound structure not found in databases. Please provide a more specific compound name.")
527
-
528
  pubchem_data = _query_api(API_ENDPOINTS["pubchem"].format(compound_input))
529
  if pubchem_data and pubchem_data.get("PC_Compounds"):
530
  st.subheader("Physicochemical Properties")
@@ -535,32 +486,24 @@ with tabs[2]:
535
  else:
536
  st.error("Physicochemical properties not available.")
537
 
538
- # -----------------------------
539
- # Tab 4: Regulatory Intelligence
540
- # -----------------------------
541
  with tabs[3]:
542
  st.header("Global Regulatory Monitoring")
543
- st.markdown("**Note:** Due to persistent issues with EMA, WHO, and DailyMed APIs, this section now focuses on FDA data and generic drug details from PubChem.")
544
- drug_name = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
545
-
546
  if st.button("Generate Regulatory Report"):
547
  with st.spinner("Compiling regulatory data..."):
548
- # FDA Data
549
- fda_info = _get_fda_approval(drug_name)
550
  fda_status = "Not Approved"
551
  if fda_info and fda_info.get("openfda", {}).get("brand_name"):
552
  fda_status = ", ".join(fda_info["openfda"]["brand_name"])
553
-
554
- # PubChem Drug Details for Generic/Formula Info
555
- pubchem_details = _get_pubchem_drug_details(drug_name)
556
  if pubchem_details:
557
  formula = pubchem_details.get("Molecular Formula", "N/A")
558
  iupac = pubchem_details.get("IUPAC Name", "N/A")
559
- canonical_smiles = pubchem_details.get("Canonical SMILES", "N/A")
560
  else:
561
- formula = iupac = canonical_smiles = "Not Available"
562
-
563
- st.subheader("Regulatory Status & Drug Details")
564
  col1, col2 = st.columns(2)
565
  with col1:
566
  st.markdown("**FDA Status**")
@@ -569,156 +512,119 @@ with tabs[3]:
569
  st.markdown("**Drug Details (PubChem)**")
570
  st.write(f"**Molecular Formula:** {formula}")
571
  st.write(f"**IUPAC Name:** {iupac}")
572
- st.write(f"**Canonical SMILES:** {canonical_smiles}")
573
-
574
- regulatory_content = (
575
- f"### Regulatory Report for {drug_name}\n\n"
576
  f"**FDA Status:** {fda_status}\n\n"
577
  f"**Molecular Formula:** {formula}\n\n"
578
  f"**IUPAC Name:** {iupac}\n\n"
579
- f"**Canonical SMILES:** {canonical_smiles}\n"
580
  )
581
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
582
- report_file = _save_pdf_report(regulatory_content, tmp_file.name)
583
- if report_file:
584
- with open(report_file, "rb") as file:
585
- st.download_button(
586
- label="Download Regulatory Report (PDF)",
587
- data=file,
588
- file_name=f"{drug_name}_regulatory_report.pdf",
589
- mime="application/pdf"
590
- )
591
- os.remove(report_file)
592
-
593
- # -----------------------------
594
- # Tab 5: Literature Search
595
- # -----------------------------
596
  with tabs[4]:
597
  st.header("Literature Search")
598
- search_term = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics")
599
  if st.button("Search PubMed"):
600
  with st.spinner("Searching PubMed..."):
601
- pubmed_data = _get_pubmed(search_term)
602
- if pubmed_data and pubmed_data.get("esearchresult", {}).get("idlist"):
603
- idlist = pubmed_data["esearchresult"]["idlist"]
604
- st.subheader(f"Found {len(idlist)} PubMed Results")
605
- for article_id in idlist:
606
- st.markdown(f"- [PMID: {article_id}](https://pubmed.ncbi.nlm.nih.gov/{article_id}/)")
607
  else:
608
  st.write("No PubMed results found.")
609
-
610
  st.header("Ontology Search")
611
- ontology_search_term = st.text_input("Enter search query for Ontology:", placeholder="e.g., Alzheimer's disease")
612
- ontology_select = st.selectbox("Select Ontology", ["MESH", "NCIT", "GO", "SNOMEDCT"])
613
  if st.button("Search BioPortal"):
614
  with st.spinner("Searching BioPortal..."):
615
- bioportal_data = _get_bioportal_data(ontology_select, ontology_search_term)
616
- if bioportal_data and bioportal_data.get("collection"):
617
- st.subheader(f"BioPortal Results for {ontology_select}")
618
- for result in bioportal_data["collection"]:
619
- label = result.get("prefLabel", "N/A")
620
- ontology_id = result.get("@id", "N/A")
621
- st.markdown(f"- **{label}** ({ontology_id})")
622
  else:
623
  st.write("No ontology results found.")
624
 
625
- # -----------------------------
626
- # Tab 6: Dashboard
627
- # -----------------------------
628
  with tabs[5]:
629
  st.header("Comprehensive Dashboard")
630
-
631
- # Placeholder KPI counts (replace with real aggregated data if available)
632
- fda_count = 5000 # Example value
633
- trials_count = 12000 # Example value
634
- pub_count = 250000 # Example value
635
-
636
- def _create_kpi_dashboard(fda: int, trials: int, pubs: int):
637
- col1, col2, col3 = st.columns(3)
638
- col1.metric("FDA Approved Drugs", fda)
639
- col2.metric("Ongoing Trials", trials)
640
- col3.metric("Publications", pubs)
641
-
642
- _create_kpi_dashboard(fda_count, trials_count, pub_count)
643
-
644
  st.subheader("Trend Analysis")
645
  years = list(range(2000, 2026))
646
- approvals_per_year = [fda_count // len(years)] * len(years) # Placeholder example data
647
- fig, ax = plt.subplots(figsize=(10, 6))
648
- sns.lineplot(x=years, y=approvals_per_year, marker="o", ax=ax)
649
- ax.set_title("FDA Approvals Over Time")
650
- ax.set_xlabel("Year")
651
- ax.set_ylabel("Number of Approvals")
652
- st.pyplot(fig)
653
-
654
  st.subheader("Gene-Variant-Drug Network (Sample)")
655
  sample_gene = "CYP2C19"
656
  sample_variants = ["rs4244285", "rs12248560"]
657
- sample_annotations = {
658
- "rs4244285": ["Clopidogrel", "Omeprazole"],
659
- "rs12248560": ["Sertraline"]
660
- }
661
  try:
662
- network_fig = _create_variant_network(sample_gene, sample_variants, sample_annotations)
663
- st.plotly_chart(network_fig, use_container_width=True)
664
  except Exception as e:
665
- st.error(f"Error generating network graph: {e}")
666
 
667
- # -----------------------------
668
- # Tab 7: Drug Data Integration
669
- # -----------------------------
670
  with tabs[6]:
671
  st.header("🧪 Drug Data Integration")
672
- drug_query = st.text_input("Enter Drug Name for API Integration:", placeholder="e.g., aspirin")
673
-
674
  if st.button("Retrieve Drug Data"):
675
- with st.spinner("Fetching drug data from multiple sources..."):
676
- # RxNorm – Get RxCUI and then properties
677
- rxnorm_id = get_rxnorm_rxcui(drug_query)
678
  if rxnorm_id:
679
- rx_properties = get_rxnorm_properties(rxnorm_id)
680
  else:
681
- rx_properties = None
682
-
683
- # RxClass – Get classes related to the drug
684
- rxclass_data = get_rxclass_by_drug_name(drug_query)
685
-
686
  st.subheader("RxNorm Data")
687
  if rxnorm_id:
688
- st.write(f"RxCUI for {drug_query}: {rxnorm_id}")
689
- if rx_properties:
690
- st.json(rx_properties)
691
- else:
692
- st.write("No RxNorm properties found.")
693
  else:
694
- st.write("No RxCUI found for the given drug name.")
695
-
696
  st.subheader("RxClass Information")
697
- if rxclass_data and rxclass_data.get("classMember"):
698
- st.json(rxclass_data)
699
  else:
700
  st.write("No RxClass data found for the given drug.")
701
-
702
- # PubChem Drug Details for generic information
703
- pubchem_details = _get_pubchem_drug_details(drug_query)
704
  st.subheader("PubChem Drug Details")
705
- if pubchem_details:
706
- st.write(f"**Molecular Formula:** {pubchem_details.get('Molecular Formula', 'N/A')}")
707
- st.write(f"**IUPAC Name:** {pubchem_details.get('IUPAC Name', 'N/A')}")
708
- st.write(f"**Canonical SMILES:** {pubchem_details.get('Canonical SMILES', 'N/A')}")
709
  else:
710
- st.write("No PubChem details found for the given drug.")
711
 
712
- # -----------------------------
713
- # Tab 8: AI Insights
714
- # -----------------------------
715
  with tabs[7]:
716
  st.header("🤖 AI Insights")
717
- ai_drug_query = st.text_input("Enter Drug Name for AI-Driven Analysis:", placeholder="e.g., aspirin")
718
  if st.button("Generate AI Insights"):
719
- with st.spinner("Generating AI insights..."):
720
- insights = generate_drug_insights(ai_drug_query)
721
  st.subheader("AI-Driven Drug Analysis")
722
- st.markdown(insights)
723
-
724
 
 
1
  import streamlit as st
2
  import requests
3
+ from rdkit import Chem
4
+ from rdkit.Chem import Draw
5
  import pandas as pd
6
  import matplotlib.pyplot as plt
7
  import seaborn as sns
 
 
8
  from fpdf import FPDF
9
  import tempfile
10
  import logging
 
11
  import os
12
  import plotly.graph_objects as go
13
  import networkx as nx
14
+ from typing import Optional, Dict, List, Any
15
 
16
+ # -----------------------------
17
+ # SETUP
18
+ # -----------------------------
19
+ # Must be the very first Streamlit command
20
  st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
 
 
21
  logging.basicConfig(level=logging.ERROR)
22
 
23
  # -----------------------------
24
+ # API ENDPOINTS
25
  # -----------------------------
26
  API_ENDPOINTS = {
27
  "clinical_trials": "https://clinicaltrials.gov/api/v2/studies", # no email required
 
29
  "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
30
  "fda_drug_approval": "https://api.fda.gov/drug/label.json",
31
  "faers_adverse_events": "https://api.fda.gov/drug/event.json",
32
+ # PharmGKB endpoints expecting a PharmGKB accession (e.g., PA1234)
33
  "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
34
  "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
35
  "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
 
37
  # RxNorm endpoints
38
  "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
39
  "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
40
+ # RxClass endpoint – note: this endpoint sometimes returns 404 if no data are available.
41
  "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
42
  }
43
 
44
+ # -----------------------------
45
+ # Retrieve Secrets
46
+ # -----------------------------
47
  OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY")
48
  BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
49
  PUB_EMAIL = st.secrets.get("PUB_EMAIL")
50
  OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
51
 
52
  if not PUB_EMAIL:
53
+ st.error("PubMed email (PUB_EMAIL) is not configured.")
54
  if not BIOPORTAL_API_KEY:
55
+ st.error("BioPortal API key (BIOPORTAL_API_KEY) is not configured.")
56
  if not OPENFDA_KEY:
57
+ st.error("OpenFDA API key (OPENFDA_KEY) is not configured.")
58
  if not OPENAI_API_KEY:
59
+ st.error("OpenAI API key (OPENAI_API_KEY) is not configured.")
60
 
61
+ # -----------------------------
62
+ # Initialize OpenAI Client (GPT-4)
63
+ # -----------------------------
64
  from openai import OpenAI
65
+ openai_client = OpenAI(api_key=OPENAI_API_KEY)
66
 
67
  def generate_content(prompt: str) -> str:
68
+ """Generate content using GPT-4 via the OpenAI API."""
69
  try:
70
+ completion = openai_client.chat.completions.create(
71
  model="gpt-4",
72
  messages=[{"role": "user", "content": prompt}],
73
  max_tokens=300
 
82
  # Utility Functions
83
  # -----------------------------
84
  def _query_api(endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict]:
85
+ """Handles API requests with error handling."""
86
  try:
87
  response = requests.get(endpoint, params=params, headers=headers, timeout=15)
88
  response.raise_for_status()
89
  return response.json()
 
 
 
 
 
 
90
  except Exception as e:
91
+ st.error(f"API error for {endpoint}: {e}")
92
+ logging.error(f"Error for {endpoint}: {e}")
93
  return None
94
 
95
  def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
96
+ """Retrieve canonical SMILES string from PubChem."""
97
  url = API_ENDPOINTS["pubchem"].format(drug_name)
98
  data = _query_api(url)
99
+ if data and data.get("PC_Compounds"):
100
  for prop in data["PC_Compounds"][0].get("props", []):
101
  if prop.get("name") == "Canonical SMILES":
102
  return prop["value"]["sval"]
103
  return None
104
 
105
  def _draw_molecule(smiles: str) -> Optional[Any]:
106
+ """Draw a molecule image using RDKit."""
107
  try:
108
  mol = Chem.MolFromSmiles(smiles)
109
  if mol:
110
  return Draw.MolToImage(mol)
111
  else:
112
+ st.error("Invalid SMILES provided.")
 
113
  except Exception as e:
114
  st.error(f"Error drawing molecule: {e}")
115
+ logging.error(e)
116
+ return None
117
 
118
  def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
119
+ """Retrieve drug details (molecular formula, IUPAC name, canonical SMILES) from PubChem."""
120
  url = API_ENDPOINTS["pubchem"].format(drug_name)
121
  data = _query_api(url)
122
  details = {}
 
134
  return None
135
 
136
  def _get_clinical_trials(query: str) -> Optional[Dict]:
137
+ """Query ClinicalTrials.gov (no email parameter needed)."""
138
  if query.upper().startswith("NCT") and query[3:].isdigit():
139
  params = {"id": query, "fmt": "json"}
140
  else:
 
142
  return _query_api(API_ENDPOINTS["clinical_trials"], params)
143
 
144
  def _get_pubmed(query: str) -> Optional[Dict]:
145
+ """Query PubMed using E-utilities."""
146
  params = {"db": "pubmed", "term": query, "retmax": 10, "retmode": "json", "email": PUB_EMAIL}
147
  return _query_api(API_ENDPOINTS["pubmed"], params)
148
 
149
  def _get_fda_approval(drug_name: str) -> Optional[Dict]:
150
+ """Retrieve FDA drug approval info using openFDA."""
151
  if not OPENFDA_KEY:
152
  st.error("OpenFDA API key not configured.")
153
  return None
 
159
  return None
160
 
161
  def _analyze_adverse_events(drug_name: str, limit: int = 5) -> Optional[Dict]:
162
+ """Fetch adverse events from FAERS."""
163
  if not OPENFDA_KEY:
164
  st.error("OpenFDA API key not configured.")
165
  return None
 
168
  return _query_api(API_ENDPOINTS["faers_adverse_events"], params)
169
 
170
  def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
171
+ """Get clinical annotations for a PharmGKB variant."""
172
  endpoint = API_ENDPOINTS["pharmgkb_variant_clinical_annotations"].format(variant_id)
173
  data = _query_api(endpoint)
174
  if data and data.get("data"):
 
177
  return None
178
 
179
  def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]]:
180
+ """Retrieve variant IDs for a PharmGKB gene accession (e.g., PA1234)."""
 
 
 
181
  if not pharmgkb_gene_id.startswith("PA"):
182
+ st.warning("Please provide a valid PharmGKB accession (e.g., PA1234).")
183
  return None
184
  endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(pharmgkb_gene_id)
185
  data = _query_api(endpoint)
 
189
  return None
190
 
191
  def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
192
+ """Retrieve PharmGKB gene data."""
 
 
193
  if not pharmgkb_gene_id.startswith("PA"):
194
+ st.warning("Please enter a valid PharmGKB gene accession (e.g., PA1234).")
195
  return None
196
  endpoint = API_ENDPOINTS["pharmgkb_gene"].format(pharmgkb_gene_id)
197
  data = _query_api(endpoint)
 
201
  return None
202
 
203
  def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
204
+ """Query BioPortal for ontology data."""
205
  if not BIOPORTAL_API_KEY:
206
  st.error("BioPortal API key not configured.")
207
  return None
208
  if not term:
209
+ st.error("Please provide a term for ontology search.")
210
  return None
211
  headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
212
  params = {"q": term, "ontologies": ontology}
213
  data = _query_api(API_ENDPOINTS["bioportal_search"], params, headers)
214
  if data and data.get("collection"):
215
  return data
216
+ st.warning("No BioPortal results found.")
217
  return None
218
 
219
  def _save_pdf_report(report_content: str, filename: str):
220
+ """Save report content as a PDF."""
221
  try:
222
  pdf = FPDF()
223
  pdf.add_page()
 
226
  pdf.output(filename)
227
  return filename
228
  except Exception as e:
229
+ st.error(f"Error saving PDF: {e}")
230
+ logging.error(e)
231
+ return None
232
 
233
  def _display_dataframe(data: List[Dict[str, Any]], columns: List[str]):
234
+ """Display a dataframe in Streamlit."""
235
  if data:
236
  df = pd.DataFrame(data, columns=columns)
237
  st.dataframe(df)
238
  return df
239
+ st.warning("No data available.")
240
  return None
241
 
242
  def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -> go.Figure:
243
+ """Create a network graph (gene-variant-drug)."""
244
  G = nx.Graph()
245
  G.add_node(gene, color="lightblue")
246
  for variant in variants:
247
  G.add_node(variant, color="lightgreen")
248
  G.add_edge(gene, variant)
249
  for drug in annotations.get(variant, []):
250
+ if drug and drug != "N/A":
251
  G.add_node(drug, color="lightcoral")
252
  G.add_edge(variant, drug)
253
  pos = nx.spring_layout(G)
 
258
  edge_x.extend([x0, x1, None])
259
  edge_y.extend([y0, y1, None])
260
  edge_trace = go.Scatter(
261
+ x=edge_x, y=edge_y, line=dict(width=0.5, color="#888"),
262
+ hoverinfo="none", mode="lines"
 
 
 
263
  )
264
  node_x, node_y, node_text, node_color = [], [], [], []
265
  for node in G.nodes():
 
269
  node_text.append(node)
270
  node_color.append(G.nodes[node]["color"])
271
  node_trace = go.Scatter(
272
+ x=node_x, y=node_y, mode="markers+text", hoverinfo="text",
273
+ text=node_text, textposition="bottom center",
274
+ marker=dict(showscale=False, colorscale="YlGnBu",
275
+ color=node_color, size=10, line_width=2)
 
 
 
 
 
 
 
 
 
276
  )
277
  fig = go.Figure(
278
  data=[edge_trace, node_trace],
279
  layout=go.Layout(
280
  title=dict(text="Gene-Variant-Drug Network", font=dict(size=16)),
281
+ showlegend=False, hovermode="closest",
 
282
  margin=dict(b=20, l=5, r=5, t=40),
283
  xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
284
  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
 
287
  return fig
288
 
289
  # -----------------------------
290
+ # RxNorm & RxClass Functions
291
  # -----------------------------
292
  def get_rxnorm_rxcui(drug_name: str) -> Optional[str]:
293
+ """Retrieve the RxCUI for a drug name."""
294
  url = f"{API_ENDPOINTS['rxnorm_rxcui']}?name={drug_name}"
295
  data = _query_api(url)
296
  if data and "idGroup" in data and data["idGroup"].get("rxnormId"):
 
299
  return None
300
 
301
  def get_rxnorm_properties(rxcui: str) -> Optional[Dict]:
302
+ """Retrieve RxNorm properties for a given RxCUI."""
303
  url = API_ENDPOINTS["rxnorm_properties"].format(rxcui)
304
  return _query_api(url)
305
 
306
  def get_rxclass_by_drug_name(drug_name: str) -> Optional[Dict]:
307
+ """Retrieve RxClass info for a drug by name; gracefully handle if not found."""
308
  url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
309
+ data = _query_api(url)
310
+ if data and "classMember" in data:
311
+ return data
312
+ return None
313
 
314
  # -----------------------------
315
+ # AI-Driven Drug Insights
316
  # -----------------------------
317
  def generate_drug_insights(drug_name: str) -> str:
318
+ """Gathers data from FDA, PubChem, RxNorm, and RxClass then uses GPT-4 to generate an innovative analysis."""
319
  # FDA Data
320
  fda_info = _get_fda_approval(drug_name)
321
  fda_status = "Not Approved"
 
341
 
342
  # RxClass Data
343
  rxclass_data = get_rxclass_by_drug_name(drug_name)
344
+ rxclass_info = rxclass_data if rxclass_data else "No RxClass data available."
 
 
 
345
 
346
+ # Construct a prompt for GPT-4
347
  prompt = (
348
  f"Drug Analysis Report for '{drug_name}':\n\n"
349
  f"**FDA Approval Status:** {fda_status}\n\n"
 
353
  f" - Canonical SMILES: {canonical_smiles}\n\n"
354
  f"**RxNorm Data:** {rxnorm_info}\n\n"
355
  f"**RxClass Data:** {rxclass_info}\n\n"
356
+ f"As an innovative pharmacogenomics researcher and AI expert, please provide a comprehensive analysis of '{drug_name}', "
357
+ f"including pharmacogenomic considerations, potential repurposing opportunities, regulatory insights, and suggestions for further research. "
358
+ f"Present your answer in bullet points."
 
 
 
 
359
  )
360
 
361
  insights = generate_content(prompt)
362
  return insights
363
 
364
  # -----------------------------
365
+ # STREAMLIT APP TABS
366
  # -----------------------------
367
  tabs = st.tabs([
368
  "💊 Drug Development",
 
375
  "🤖 AI Insights"
376
  ])
377
 
378
+ # ----- Tab 1: Drug Development -----
 
 
379
  with tabs[0]:
380
  st.header("AI-Driven Drug Development Strategy")
381
  target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
382
+ target_gene = st.text_input("Target Gene (PharmGKB Accession):", placeholder="e.g., PA1234")
383
  strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
384
 
385
  if st.button("Generate Development Plan"):
386
  with st.spinner("Generating plan..."):
387
  plan_prompt = (
388
+ f"Develop a comprehensive drug development plan for treating {target} using a {strategy} strategy. "
389
+ "Include sections on target validation, lead optimization, preclinical testing, clinical trial design, "
390
+ "regulatory submission strategy, market analysis, and competitive landscape. Highlight key milestones and challenges."
 
391
  )
392
  plan = generate_content(plan_prompt)
393
  st.subheader("Comprehensive Development Plan")
394
  st.markdown(plan)
395
 
396
+ st.subheader("FDA Regulatory Insights")
397
  if target:
398
  fda_info = _get_fda_approval(target.split()[0])
 
399
  if fda_info:
400
  st.json(fda_info)
401
  else:
402
  st.write("No FDA data found for the given target.")
403
 
 
404
  st.subheader("Pharmacogenomic Considerations")
405
  if target_gene:
406
  if not target_gene.startswith("PA"):
407
+ st.warning("Please provide a valid PharmGKB accession (e.g., PA1234).")
408
  else:
409
  variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
410
  if variant_ids:
411
  annotations = {}
412
+ for vid in variant_ids[:5]:
413
+ pgx = _get_pharmgkb_clinical_annotations(vid)
414
+ annotations[vid] = [anno.get("obj2Name", "N/A") for anno in pgx.get("data", [])] if pgx else []
415
+ st.write(f"### Annotations for Variant: {vid}")
416
+ st.json(pgx if pgx else {"message": "No annotations found."})
 
 
 
 
 
 
417
  else:
418
  st.write("No variants found for the specified PharmGKB gene accession.")
419
  else:
420
+ st.write("Enter a PharmGKB gene accession to retrieve pharmacogenomic data.")
421
 
422
+ # ----- Tab 2: Clinical Trial Analytics -----
 
 
423
  with tabs[1]:
424
  st.header("Clinical Trial Landscape Analytics")
425
  trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number")
 
426
  if st.button("Analyze Trial Landscape"):
427
  with st.spinner("Fetching trial data..."):
428
  trials = _get_clinical_trials(trial_query)
 
433
  "Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"),
434
  "Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"),
435
  "Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["Not Available"])[0],
436
+ "Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "N/A")
437
  })
438
  _display_dataframe(trial_data, list(trial_data[0].keys()))
439
  else:
440
+ st.warning("No clinical trials found for the query.")
441
 
442
  ae_data = _analyze_adverse_events(trial_query)
443
  if ae_data and ae_data.get("results"):
444
+ st.subheader("Adverse Event Profile (Top 5)")
445
  ae_results = ae_data["results"][:5]
446
  ae_df = pd.json_normalize(ae_results)
447
  st.dataframe(ae_df)
 
460
  else:
461
  st.write("No adverse event data available.")
462
 
463
+ # ----- Tab 3: Molecular Profiling -----
 
 
464
  with tabs[2]:
465
  st.header("Advanced Molecular Profiling")
466
  compound_input = st.text_input("Compound Identifier:", placeholder="Enter drug name, SMILES, or INN")
 
467
  if st.button("Analyze Compound"):
468
  with st.spinner("Querying PubChem..."):
 
 
469
  if Chem.MolFromSmiles(compound_input):
470
  smiles = compound_input
471
  else:
 
473
  if smiles:
474
  img = _draw_molecule(smiles)
475
  if img:
476
+ st.image(img, caption="2D Molecular Structure")
477
  else:
478
+ st.error("Structure not found. Please try a more specific compound name.")
 
479
  pubchem_data = _query_api(API_ENDPOINTS["pubchem"].format(compound_input))
480
  if pubchem_data and pubchem_data.get("PC_Compounds"):
481
  st.subheader("Physicochemical Properties")
 
486
  else:
487
  st.error("Physicochemical properties not available.")
488
 
489
+ # ----- Tab 4: Regulatory Intelligence -----
 
 
490
  with tabs[3]:
491
  st.header("Global Regulatory Monitoring")
492
+ st.markdown("**Note:** This section now focuses on FDA data and generic drug details from PubChem.")
493
+ drug_prod = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
 
494
  if st.button("Generate Regulatory Report"):
495
  with st.spinner("Compiling regulatory data..."):
496
+ fda_info = _get_fda_approval(drug_prod)
 
497
  fda_status = "Not Approved"
498
  if fda_info and fda_info.get("openfda", {}).get("brand_name"):
499
  fda_status = ", ".join(fda_info["openfda"]["brand_name"])
500
+ pubchem_details = _get_pubchem_drug_details(drug_prod)
 
 
501
  if pubchem_details:
502
  formula = pubchem_details.get("Molecular Formula", "N/A")
503
  iupac = pubchem_details.get("IUPAC Name", "N/A")
504
+ canon_smiles = pubchem_details.get("Canonical SMILES", "N/A")
505
  else:
506
+ formula = iupac = canon_smiles = "Not Available"
 
 
507
  col1, col2 = st.columns(2)
508
  with col1:
509
  st.markdown("**FDA Status**")
 
512
  st.markdown("**Drug Details (PubChem)**")
513
  st.write(f"**Molecular Formula:** {formula}")
514
  st.write(f"**IUPAC Name:** {iupac}")
515
+ st.write(f"**Canonical SMILES:** {canon_smiles}")
516
+ report_text = (
517
+ f"### Regulatory Report for {drug_prod}\n\n"
 
518
  f"**FDA Status:** {fda_status}\n\n"
519
  f"**Molecular Formula:** {formula}\n\n"
520
  f"**IUPAC Name:** {iupac}\n\n"
521
+ f"**Canonical SMILES:** {canon_smiles}\n"
522
  )
523
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
524
+ pdf_file = _save_pdf_report(report_text, tmp.name)
525
+ if pdf_file:
526
+ with open(pdf_file, "rb") as f:
527
+ st.download_button("Download Regulatory Report (PDF)", data=f, file_name=f"{drug_prod}_report.pdf", mime="application/pdf")
528
+ os.remove(pdf_file)
529
+
530
+ # ----- Tab 5: Literature Search -----
 
 
 
 
 
 
 
531
  with tabs[4]:
532
  st.header("Literature Search")
533
+ lit_query = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics")
534
  if st.button("Search PubMed"):
535
  with st.spinner("Searching PubMed..."):
536
+ pubmed_results = _get_pubmed(lit_query)
537
+ if pubmed_results and pubmed_results.get("esearchresult", {}).get("idlist"):
538
+ id_list = pubmed_results["esearchresult"]["idlist"]
539
+ st.subheader(f"Found {len(id_list)} PubMed Results")
540
+ for pmid in id_list:
541
+ st.markdown(f"- [PMID: {pmid}](https://pubmed.ncbi.nlm.nih.gov/{pmid}/)")
542
  else:
543
  st.write("No PubMed results found.")
 
544
  st.header("Ontology Search")
545
+ ont_query = st.text_input("Enter search query for Ontology:", placeholder="e.g., Alzheimer's disease")
546
+ ont_select = st.selectbox("Select Ontology", ["MESH", "NCIT", "GO", "SNOMEDCT"])
547
  if st.button("Search BioPortal"):
548
  with st.spinner("Searching BioPortal..."):
549
+ bioportal_results = _get_bioportal_data(ont_select, ont_query)
550
+ if bioportal_results and bioportal_results.get("collection"):
551
+ st.subheader(f"BioPortal Results for {ont_select}")
552
+ for item in bioportal_results["collection"]:
553
+ label = item.get("prefLabel", "N/A")
554
+ ont_id = item.get("@id", "N/A")
555
+ st.markdown(f"- **{label}** ({ont_id})")
556
  else:
557
  st.write("No ontology results found.")
558
 
559
+ # ----- Tab 6: Dashboard -----
 
 
560
  with tabs[5]:
561
  st.header("Comprehensive Dashboard")
562
+ # Placeholder KPIs – replace with real-time aggregated data if available
563
+ kpi_fda = 5000
564
+ kpi_trials = 12000
565
+ kpi_pubs = 250000
566
+ col1, col2, col3 = st.columns(3)
567
+ col1.metric("FDA Approved Drugs", kpi_fda)
568
+ col2.metric("Ongoing Trials", kpi_trials)
569
+ col3.metric("Publications", kpi_pubs)
 
 
 
 
 
 
570
  st.subheader("Trend Analysis")
571
  years = list(range(2000, 2026))
572
+ approvals = [kpi_fda // len(years)] * len(years) # Sample static data
573
+ fig_trend, ax_trend = plt.subplots(figsize=(10, 6))
574
+ sns.lineplot(x=years, y=approvals, marker="o", ax=ax_trend)
575
+ ax_trend.set_title("FDA Approvals Over Time")
576
+ ax_trend.set_xlabel("Year")
577
+ ax_trend.set_ylabel("Number of Approvals")
578
+ st.pyplot(fig_trend)
 
579
  st.subheader("Gene-Variant-Drug Network (Sample)")
580
  sample_gene = "CYP2C19"
581
  sample_variants = ["rs4244285", "rs12248560"]
582
+ sample_annots = {"rs4244285": ["Clopidogrel", "Omeprazole"], "rs12248560": ["Sertraline"]}
 
 
 
583
  try:
584
+ net_fig = _create_variant_network(sample_gene, sample_variants, sample_annots)
585
+ st.plotly_chart(net_fig, use_container_width=True)
586
  except Exception as e:
587
+ st.error(f"Network graph error: {e}")
588
 
589
+ # ----- Tab 7: Drug Data Integration -----
 
 
590
  with tabs[6]:
591
  st.header("🧪 Drug Data Integration")
592
+ drug_integration = st.text_input("Enter Drug Name for API Integration:", placeholder="e.g., aspirin")
 
593
  if st.button("Retrieve Drug Data"):
594
+ with st.spinner("Fetching drug data..."):
595
+ rxnorm_id = get_rxnorm_rxcui(drug_integration)
 
596
  if rxnorm_id:
597
+ rx_props = get_rxnorm_properties(rxnorm_id)
598
  else:
599
+ rx_props = None
600
+ rxclass_info = get_rxclass_by_drug_name(drug_integration)
 
 
 
601
  st.subheader("RxNorm Data")
602
  if rxnorm_id:
603
+ st.write(f"RxCUI for {drug_integration}: {rxnorm_id}")
604
+ st.json(rx_props if rx_props else {"message": "No RxNorm properties found."})
 
 
 
605
  else:
606
+ st.write("No RxCUI found.")
 
607
  st.subheader("RxClass Information")
608
+ if rxclass_info:
609
+ st.json(rxclass_info)
610
  else:
611
  st.write("No RxClass data found for the given drug.")
612
+ pubchem_info = _get_pubchem_drug_details(drug_integration)
 
 
613
  st.subheader("PubChem Drug Details")
614
+ if pubchem_info:
615
+ st.write(f"**Molecular Formula:** {pubchem_info.get('Molecular Formula', 'N/A')}")
616
+ st.write(f"**IUPAC Name:** {pubchem_info.get('IUPAC Name', 'N/A')}")
617
+ st.write(f"**Canonical SMILES:** {pubchem_info.get('Canonical SMILES', 'N/A')}")
618
  else:
619
+ st.write("No PubChem details found.")
620
 
621
+ # ----- Tab 8: AI Insights -----
 
 
622
  with tabs[7]:
623
  st.header("🤖 AI Insights")
624
+ ai_drug = st.text_input("Enter Drug Name for AI-Driven Analysis:", placeholder="e.g., tylenol")
625
  if st.button("Generate AI Insights"):
626
+ with st.spinner("Generating insights..."):
627
+ insights_text = generate_drug_insights(ai_drug)
628
  st.subheader("AI-Driven Drug Analysis")
629
+ st.markdown(insights_text)
 
630