mgbam commited on
Commit
3cc71e0
·
verified ·
1 Parent(s): 443a674

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -62
app.py CHANGED
@@ -14,7 +14,7 @@ import os
14
  import plotly.graph_objects as go
15
  import networkx as nx
16
 
17
- # --- IMPORTANT: set_page_config MUST be the very first Streamlit command ---
18
  st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
19
 
20
  # Setup logging
@@ -30,7 +30,9 @@ API_ENDPOINTS = {
30
  "who_drugs": "https://health-products.canada.ca/api/drug/product",
31
  "fda_drug_approval": "https://api.fda.gov/drug/label.json",
32
  "faers_adverse_events": "https://api.fda.gov/drug/event.json",
 
33
  "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
 
34
  "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
35
  "bioportal_search": "https://data.bioontology.org/search",
36
  "dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
@@ -60,10 +62,7 @@ from openai import OpenAI
60
  client = OpenAI(api_key=OPENAI_API_KEY)
61
 
62
  def generate_content(prompt: str) -> str:
63
- """
64
- Generates content using GPT‑4 via the new OpenAI SDK.
65
- Uses the chat completions endpoint.
66
- """
67
  try:
68
  completion = client.chat.completions.create(
69
  model="gpt-4",
@@ -169,18 +168,34 @@ def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
169
  st.write(f"No clinical annotations found for variant {variant_id}.")
170
  return None
171
 
172
- def _get_pharmgkb_variants_for_gene(gene_symbol: str) -> Optional[List[str]]:
173
- """Retrieves variant IDs for a gene from PharmGKB."""
174
- gene_search_endpoint = "https://api.pharmgkb.org/v1/data/gene"
175
- params = {"name": gene_symbol}
176
- gene_data = _query_api(gene_search_endpoint, params)
177
- if gene_data and gene_data.get("data") and len(gene_data["data"]) > 0:
178
- gene_id = gene_data["data"][0]["id"]
179
- variants_endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(gene_id)
180
- variants_data = _query_api(variants_endpoint)
181
- if variants_data and variants_data.get("data"):
182
- return [variant["id"] for variant in variants_data["data"]]
183
- st.warning(f"No variants found for gene {gene_symbol}.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  return None
185
 
186
  def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
@@ -188,7 +203,9 @@ def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
188
  try:
189
  search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
190
  headers = {
191
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
 
 
192
  }
193
  response = requests.get(search_url, headers=headers, timeout=10)
194
  response.raise_for_status()
@@ -291,10 +308,13 @@ def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -
291
  x1, y1 = pos[edge[1]]
292
  edge_x.extend([x0, x1, None])
293
  edge_y.extend([y0, y1, None])
294
- edge_trace = go.Scatter(x=edge_x, y=edge_y,
295
- line=dict(width=0.5, color="#888"),
296
- hoverinfo="none",
297
- mode="lines")
 
 
 
298
  node_x, node_y, node_text, node_color = [], [], [], []
299
  for node in G.nodes():
300
  x, y = pos[node]
@@ -351,7 +371,7 @@ tabs = st.tabs([
351
  with tabs[0]:
352
  st.header("AI-Driven Drug Development Strategy")
353
  target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
354
- target_gene = st.text_input("Target Gene (for Pharmacogenomics):", placeholder="Enter the gene symbol")
355
  strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
356
 
357
  if st.button("Generate Development Plan"):
@@ -378,25 +398,27 @@ with tabs[0]:
378
  # Pharmacogenomic Considerations via PharmGKB
379
  st.subheader("Pharmacogenomic Considerations")
380
  if target_gene:
381
- variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
382
- if variant_ids:
383
- annotations = {}
384
- for variant_id in variant_ids[:5]:
385
- pgx_data = _get_pharmgkb_clinical_annotations(variant_id)
386
- if pgx_data and pgx_data.get("data"):
387
- # Collect a list of drugs (or object names) from the annotations
388
- annotations[variant_id] = [anno.get("obj2Name", "N/A") for anno in pgx_data["data"]]
389
- else:
390
- annotations[variant_id] = []
391
- st.write(f"### Clinical Annotations for Variant: {variant_id}")
392
- if pgx_data:
393
- st.json(pgx_data)
394
- else:
395
- st.write(f"No annotations for variant {variant_id}.")
396
  else:
397
- st.write("No variants found for the specified gene.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  else:
399
- st.write("Please enter a target gene for pharmacogenomic data.")
400
 
401
  # -----------------------------
402
  # Tab 2: Clinical Trial Analytics
@@ -412,10 +434,10 @@ with tabs[1]:
412
  trial_data = []
413
  for study in trials["studies"][:5]:
414
  trial_data.append({
415
- "Title": study.get("briefTitle", "N/A"),
416
- "Status": study.get("overallStatus", "N/A"),
417
- "Phase": study.get("phase", "Not Available"),
418
- "Enrollment": study.get("enrollmentCount", "Not Available")
419
  })
420
  _display_dataframe(trial_data, list(trial_data[0].keys()))
421
  else:
@@ -452,7 +474,7 @@ with tabs[2]:
452
  if st.button("Analyze Compound"):
453
  with st.spinner("Querying PubChem..."):
454
  smiles = None
455
- # Check if the input is a valid SMILES string
456
  if Chem.MolFromSmiles(compound_input):
457
  smiles = compound_input
458
  else:
@@ -493,7 +515,7 @@ with tabs[3]:
493
  ema_info = scrape_ema_drug_info(drug_name)
494
  ema_status = ema_info.get("EMA Approval Status") if ema_info else "Not Available"
495
 
496
- # WHO Data from Canada Health Products API (may return 404)
497
  who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
498
  who_status = "Yes" if who else "No"
499
 
@@ -543,7 +565,7 @@ with tabs[3]:
543
  os.remove(report_file)
544
 
545
  # -----------------------------
546
- # Tab 5: Literature Search (PubMed and BioPortal)
547
  # -----------------------------
548
  with tabs[4]:
549
  st.header("Literature Search")
@@ -580,7 +602,7 @@ with tabs[4]:
580
  with tabs[5]:
581
  st.header("Comprehensive Dashboard")
582
 
583
- # Placeholder KPI counts (replace with actual aggregated data if available)
584
  fda_count = 5000 # Example value
585
  ema_count = 3000 # Example value
586
  who_count = 1500 # Example value
@@ -619,17 +641,4 @@ with tabs[5]:
619
  network_fig = _create_variant_network(sample_gene, sample_variants, sample_annotations)
620
  st.plotly_chart(network_fig, use_container_width=True)
621
  except Exception as e:
622
- st.error(f"Error generating network graph: {e}")
623
-
624
- # -----------------------------
625
- # Sidebar Information
626
- # -----------------------------
627
- st.sidebar.header("About")
628
- st.sidebar.info("""
629
- **Pharma Research Expert Platform**
630
-
631
- An integrated tool for drug discovery, clinical research, and regulatory affairs.
632
-
633
- **Developed by:** Your Name
634
- **Contact:** [[email protected]](mailto:[email protected])
635
- """)
 
14
  import plotly.graph_objects as go
15
  import networkx as nx
16
 
17
+ # --- IMPORTANT: st.set_page_config MUST be the very first Streamlit command ---
18
  st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
19
 
20
  # Setup logging
 
30
  "who_drugs": "https://health-products.canada.ca/api/drug/product",
31
  "fda_drug_approval": "https://api.fda.gov/drug/label.json",
32
  "faers_adverse_events": "https://api.fda.gov/drug/event.json",
33
+ # PharmGKB endpoints expect a PharmGKB accession (e.g., PA1234)
34
  "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
35
+ "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}", # expects PharmGKB accession
36
  "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
37
  "bioportal_search": "https://data.bioontology.org/search",
38
  "dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
 
62
  client = OpenAI(api_key=OPENAI_API_KEY)
63
 
64
  def generate_content(prompt: str) -> str:
65
+ """Generate content using GPT-4 via the new OpenAI SDK."""
 
 
 
66
  try:
67
  completion = client.chat.completions.create(
68
  model="gpt-4",
 
168
  st.write(f"No clinical annotations found for variant {variant_id}.")
169
  return None
170
 
171
+ def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]]:
172
+ """
173
+ Retrieves variant IDs for a gene using the PharmGKB accession.
174
+ The input must start with "PA". If not, a warning is issued.
175
+ """
176
+ if not pharmgkb_gene_id.startswith("PA"):
177
+ st.warning("Please provide a valid PharmGKB accession ID for the gene (e.g., PA1234).")
178
+ return None
179
+ endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(pharmgkb_gene_id)
180
+ data = _query_api(endpoint)
181
+ if data and data.get("data"):
182
+ return [variant["id"] for variant in data["data"]]
183
+ st.warning(f"No variants found for PharmGKB gene {pharmgkb_gene_id}.")
184
+ return None
185
+
186
+ def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
187
+ """
188
+ Retrieves PharmGKB gene data using a PharmGKB accession.
189
+ If the gene identifier does not start with "PA", a warning is issued.
190
+ """
191
+ if not pharmgkb_gene_id.startswith("PA"):
192
+ st.warning("Please enter a valid PharmGKB gene accession ID (e.g., PA1234).")
193
+ return None
194
+ endpoint = API_ENDPOINTS["pharmgkb_gene"].format(pharmgkb_gene_id)
195
+ data = _query_api(endpoint)
196
+ if data and data.get("data"):
197
+ return data["data"][0]
198
+ st.write(f"No data found for PharmGKB gene {pharmgkb_gene_id}.")
199
  return None
200
 
201
  def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
 
203
  try:
204
  search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
205
  headers = {
206
+ "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
207
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
208
+ "Chrome/90.0.4430.93 Safari/537.36")
209
  }
210
  response = requests.get(search_url, headers=headers, timeout=10)
211
  response.raise_for_status()
 
308
  x1, y1 = pos[edge[1]]
309
  edge_x.extend([x0, x1, None])
310
  edge_y.extend([y0, y1, None])
311
+ edge_trace = go.Scatter(
312
+ x=edge_x,
313
+ y=edge_y,
314
+ line=dict(width=0.5, color="#888"),
315
+ hoverinfo="none",
316
+ mode="lines"
317
+ )
318
  node_x, node_y, node_text, node_color = [], [], [], []
319
  for node in G.nodes():
320
  x, y = pos[node]
 
371
  with tabs[0]:
372
  st.header("AI-Driven Drug Development Strategy")
373
  target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
374
+ target_gene = st.text_input("Target Gene (PharmGKB Accession):", placeholder="Enter PharmGKB accession (e.g., PA1234)")
375
  strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
376
 
377
  if st.button("Generate Development Plan"):
 
398
  # Pharmacogenomic Considerations via PharmGKB
399
  st.subheader("Pharmacogenomic Considerations")
400
  if target_gene:
401
+ if not target_gene.startswith("PA"):
402
+ st.warning("Please provide a valid PharmGKB accession (e.g., PA1234) for the gene.")
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  else:
404
+ variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
405
+ if variant_ids:
406
+ annotations = {}
407
+ for variant_id in variant_ids[:5]:
408
+ pgx_data = _get_pharmgkb_clinical_annotations(variant_id)
409
+ if pgx_data and pgx_data.get("data"):
410
+ annotations[variant_id] = [anno.get("obj2Name", "N/A") for anno in pgx_data["data"]]
411
+ else:
412
+ annotations[variant_id] = []
413
+ st.write(f"### Clinical Annotations for Variant: {variant_id}")
414
+ if pgx_data:
415
+ st.json(pgx_data)
416
+ else:
417
+ st.write(f"No annotations for variant {variant_id}.")
418
+ else:
419
+ st.write("No variants found for the specified PharmGKB gene accession.")
420
  else:
421
+ st.write("Please enter a PharmGKB accession for the target gene to retrieve pharmacogenomic data.")
422
 
423
  # -----------------------------
424
  # Tab 2: Clinical Trial Analytics
 
434
  trial_data = []
435
  for study in trials["studies"][:5]:
436
  trial_data.append({
437
+ "Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"),
438
+ "Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"),
439
+ "Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["Not Available"])[0],
440
+ "Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "Not Available")
441
  })
442
  _display_dataframe(trial_data, list(trial_data[0].keys()))
443
  else:
 
474
  if st.button("Analyze Compound"):
475
  with st.spinner("Querying PubChem..."):
476
  smiles = None
477
+ # If the input is already a valid SMILES, use it; otherwise query PubChem
478
  if Chem.MolFromSmiles(compound_input):
479
  smiles = compound_input
480
  else:
 
515
  ema_info = scrape_ema_drug_info(drug_name)
516
  ema_status = ema_info.get("EMA Approval Status") if ema_info else "Not Available"
517
 
518
+ # WHO Data from Canada Health Products API (this endpoint may return 404)
519
  who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
520
  who_status = "Yes" if who else "No"
521
 
 
565
  os.remove(report_file)
566
 
567
  # -----------------------------
568
+ # Tab 5: Literature Search
569
  # -----------------------------
570
  with tabs[4]:
571
  st.header("Literature Search")
 
602
  with tabs[5]:
603
  st.header("Comprehensive Dashboard")
604
 
605
+ # Placeholder KPI counts (replace with real aggregated data if available)
606
  fda_count = 5000 # Example value
607
  ema_count = 3000 # Example value
608
  who_count = 1500 # Example value
 
641
  network_fig = _create_variant_network(sample_gene, sample_variants, sample_annotations)
642
  st.plotly_chart(network_fig, use_container_width=True)
643
  except Exception as e:
644
+ st.error(f"Error generating network graph: {e}")