Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ import os
|
|
14 |
import plotly.graph_objects as go
|
15 |
import networkx as nx
|
16 |
|
17 |
-
# --- IMPORTANT: set_page_config MUST be the very first Streamlit command ---
|
18 |
st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
|
19 |
|
20 |
# Setup logging
|
@@ -30,7 +30,9 @@ API_ENDPOINTS = {
|
|
30 |
"who_drugs": "https://health-products.canada.ca/api/drug/product",
|
31 |
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
32 |
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
|
|
|
33 |
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
|
|
|
34 |
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
|
35 |
"bioportal_search": "https://data.bioontology.org/search",
|
36 |
"dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
|
@@ -60,10 +62,7 @@ from openai import OpenAI
|
|
60 |
client = OpenAI(api_key=OPENAI_API_KEY)
|
61 |
|
62 |
def generate_content(prompt: str) -> str:
|
63 |
-
"""
|
64 |
-
Generates content using GPT‑4 via the new OpenAI SDK.
|
65 |
-
Uses the chat completions endpoint.
|
66 |
-
"""
|
67 |
try:
|
68 |
completion = client.chat.completions.create(
|
69 |
model="gpt-4",
|
@@ -169,18 +168,34 @@ def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
|
|
169 |
st.write(f"No clinical annotations found for variant {variant_id}.")
|
170 |
return None
|
171 |
|
172 |
-
def _get_pharmgkb_variants_for_gene(
|
173 |
-
"""
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
if
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
return None
|
185 |
|
186 |
def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
|
@@ -188,7 +203,9 @@ def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
|
|
188 |
try:
|
189 |
search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
|
190 |
headers = {
|
191 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)
|
|
|
|
|
192 |
}
|
193 |
response = requests.get(search_url, headers=headers, timeout=10)
|
194 |
response.raise_for_status()
|
@@ -291,10 +308,13 @@ def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -
|
|
291 |
x1, y1 = pos[edge[1]]
|
292 |
edge_x.extend([x0, x1, None])
|
293 |
edge_y.extend([y0, y1, None])
|
294 |
-
edge_trace = go.Scatter(
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
|
|
|
|
298 |
node_x, node_y, node_text, node_color = [], [], [], []
|
299 |
for node in G.nodes():
|
300 |
x, y = pos[node]
|
@@ -351,7 +371,7 @@ tabs = st.tabs([
|
|
351 |
with tabs[0]:
|
352 |
st.header("AI-Driven Drug Development Strategy")
|
353 |
target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
|
354 |
-
target_gene = st.text_input("Target Gene (
|
355 |
strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
|
356 |
|
357 |
if st.button("Generate Development Plan"):
|
@@ -378,25 +398,27 @@ with tabs[0]:
|
|
378 |
# Pharmacogenomic Considerations via PharmGKB
|
379 |
st.subheader("Pharmacogenomic Considerations")
|
380 |
if target_gene:
|
381 |
-
|
382 |
-
|
383 |
-
annotations = {}
|
384 |
-
for variant_id in variant_ids[:5]:
|
385 |
-
pgx_data = _get_pharmgkb_clinical_annotations(variant_id)
|
386 |
-
if pgx_data and pgx_data.get("data"):
|
387 |
-
# Collect a list of drugs (or object names) from the annotations
|
388 |
-
annotations[variant_id] = [anno.get("obj2Name", "N/A") for anno in pgx_data["data"]]
|
389 |
-
else:
|
390 |
-
annotations[variant_id] = []
|
391 |
-
st.write(f"### Clinical Annotations for Variant: {variant_id}")
|
392 |
-
if pgx_data:
|
393 |
-
st.json(pgx_data)
|
394 |
-
else:
|
395 |
-
st.write(f"No annotations for variant {variant_id}.")
|
396 |
else:
|
397 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
else:
|
399 |
-
st.write("Please enter a target gene
|
400 |
|
401 |
# -----------------------------
|
402 |
# Tab 2: Clinical Trial Analytics
|
@@ -412,10 +434,10 @@ with tabs[1]:
|
|
412 |
trial_data = []
|
413 |
for study in trials["studies"][:5]:
|
414 |
trial_data.append({
|
415 |
-
"Title": study.get("briefTitle", "N/A"),
|
416 |
-
"Status": study.get("overallStatus", "N/A"),
|
417 |
-
"Phase": study.get("
|
418 |
-
"Enrollment": study.get("
|
419 |
})
|
420 |
_display_dataframe(trial_data, list(trial_data[0].keys()))
|
421 |
else:
|
@@ -452,7 +474,7 @@ with tabs[2]:
|
|
452 |
if st.button("Analyze Compound"):
|
453 |
with st.spinner("Querying PubChem..."):
|
454 |
smiles = None
|
455 |
-
#
|
456 |
if Chem.MolFromSmiles(compound_input):
|
457 |
smiles = compound_input
|
458 |
else:
|
@@ -493,7 +515,7 @@ with tabs[3]:
|
|
493 |
ema_info = scrape_ema_drug_info(drug_name)
|
494 |
ema_status = ema_info.get("EMA Approval Status") if ema_info else "Not Available"
|
495 |
|
496 |
-
# WHO Data from Canada Health Products API (may return 404)
|
497 |
who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
|
498 |
who_status = "Yes" if who else "No"
|
499 |
|
@@ -543,7 +565,7 @@ with tabs[3]:
|
|
543 |
os.remove(report_file)
|
544 |
|
545 |
# -----------------------------
|
546 |
-
# Tab 5: Literature Search
|
547 |
# -----------------------------
|
548 |
with tabs[4]:
|
549 |
st.header("Literature Search")
|
@@ -580,7 +602,7 @@ with tabs[4]:
|
|
580 |
with tabs[5]:
|
581 |
st.header("Comprehensive Dashboard")
|
582 |
|
583 |
-
# Placeholder KPI counts (replace with
|
584 |
fda_count = 5000 # Example value
|
585 |
ema_count = 3000 # Example value
|
586 |
who_count = 1500 # Example value
|
@@ -619,17 +641,4 @@ with tabs[5]:
|
|
619 |
network_fig = _create_variant_network(sample_gene, sample_variants, sample_annotations)
|
620 |
st.plotly_chart(network_fig, use_container_width=True)
|
621 |
except Exception as e:
|
622 |
-
st.error(f"Error generating network graph: {e}")
|
623 |
-
|
624 |
-
# -----------------------------
|
625 |
-
# Sidebar Information
|
626 |
-
# -----------------------------
|
627 |
-
st.sidebar.header("About")
|
628 |
-
st.sidebar.info("""
|
629 |
-
**Pharma Research Expert Platform**
|
630 |
-
|
631 |
-
An integrated tool for drug discovery, clinical research, and regulatory affairs.
|
632 |
-
|
633 |
-
**Developed by:** Your Name
|
634 |
-
**Contact:** [[email protected]](mailto:[email protected])
|
635 |
-
""")
|
|
|
14 |
import plotly.graph_objects as go
|
15 |
import networkx as nx
|
16 |
|
17 |
+
# --- IMPORTANT: st.set_page_config MUST be the very first Streamlit command ---
|
18 |
st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
|
19 |
|
20 |
# Setup logging
|
|
|
30 |
"who_drugs": "https://health-products.canada.ca/api/drug/product",
|
31 |
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
32 |
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
|
33 |
+
# PharmGKB endpoints expect a PharmGKB accession (e.g., PA1234)
|
34 |
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
|
35 |
+
"pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}", # expects PharmGKB accession
|
36 |
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
|
37 |
"bioportal_search": "https://data.bioontology.org/search",
|
38 |
"dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
|
|
|
62 |
client = OpenAI(api_key=OPENAI_API_KEY)
|
63 |
|
64 |
def generate_content(prompt: str) -> str:
|
65 |
+
"""Generate content using GPT-4 via the new OpenAI SDK."""
|
|
|
|
|
|
|
66 |
try:
|
67 |
completion = client.chat.completions.create(
|
68 |
model="gpt-4",
|
|
|
168 |
st.write(f"No clinical annotations found for variant {variant_id}.")
|
169 |
return None
|
170 |
|
171 |
+
def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]]:
|
172 |
+
"""
|
173 |
+
Retrieves variant IDs for a gene using the PharmGKB accession.
|
174 |
+
The input must start with "PA". If not, a warning is issued.
|
175 |
+
"""
|
176 |
+
if not pharmgkb_gene_id.startswith("PA"):
|
177 |
+
st.warning("Please provide a valid PharmGKB accession ID for the gene (e.g., PA1234).")
|
178 |
+
return None
|
179 |
+
endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(pharmgkb_gene_id)
|
180 |
+
data = _query_api(endpoint)
|
181 |
+
if data and data.get("data"):
|
182 |
+
return [variant["id"] for variant in data["data"]]
|
183 |
+
st.warning(f"No variants found for PharmGKB gene {pharmgkb_gene_id}.")
|
184 |
+
return None
|
185 |
+
|
186 |
+
def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
|
187 |
+
"""
|
188 |
+
Retrieves PharmGKB gene data using a PharmGKB accession.
|
189 |
+
If the gene identifier does not start with "PA", a warning is issued.
|
190 |
+
"""
|
191 |
+
if not pharmgkb_gene_id.startswith("PA"):
|
192 |
+
st.warning("Please enter a valid PharmGKB gene accession ID (e.g., PA1234).")
|
193 |
+
return None
|
194 |
+
endpoint = API_ENDPOINTS["pharmgkb_gene"].format(pharmgkb_gene_id)
|
195 |
+
data = _query_api(endpoint)
|
196 |
+
if data and data.get("data"):
|
197 |
+
return data["data"][0]
|
198 |
+
st.write(f"No data found for PharmGKB gene {pharmgkb_gene_id}.")
|
199 |
return None
|
200 |
|
201 |
def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
|
|
|
203 |
try:
|
204 |
search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
|
205 |
headers = {
|
206 |
+
"User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
207 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
208 |
+
"Chrome/90.0.4430.93 Safari/537.36")
|
209 |
}
|
210 |
response = requests.get(search_url, headers=headers, timeout=10)
|
211 |
response.raise_for_status()
|
|
|
308 |
x1, y1 = pos[edge[1]]
|
309 |
edge_x.extend([x0, x1, None])
|
310 |
edge_y.extend([y0, y1, None])
|
311 |
+
edge_trace = go.Scatter(
|
312 |
+
x=edge_x,
|
313 |
+
y=edge_y,
|
314 |
+
line=dict(width=0.5, color="#888"),
|
315 |
+
hoverinfo="none",
|
316 |
+
mode="lines"
|
317 |
+
)
|
318 |
node_x, node_y, node_text, node_color = [], [], [], []
|
319 |
for node in G.nodes():
|
320 |
x, y = pos[node]
|
|
|
371 |
with tabs[0]:
|
372 |
st.header("AI-Driven Drug Development Strategy")
|
373 |
target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
|
374 |
+
target_gene = st.text_input("Target Gene (PharmGKB Accession):", placeholder="Enter PharmGKB accession (e.g., PA1234)")
|
375 |
strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
|
376 |
|
377 |
if st.button("Generate Development Plan"):
|
|
|
398 |
# Pharmacogenomic Considerations via PharmGKB
|
399 |
st.subheader("Pharmacogenomic Considerations")
|
400 |
if target_gene:
|
401 |
+
if not target_gene.startswith("PA"):
|
402 |
+
st.warning("Please provide a valid PharmGKB accession (e.g., PA1234) for the gene.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
else:
|
404 |
+
variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
|
405 |
+
if variant_ids:
|
406 |
+
annotations = {}
|
407 |
+
for variant_id in variant_ids[:5]:
|
408 |
+
pgx_data = _get_pharmgkb_clinical_annotations(variant_id)
|
409 |
+
if pgx_data and pgx_data.get("data"):
|
410 |
+
annotations[variant_id] = [anno.get("obj2Name", "N/A") for anno in pgx_data["data"]]
|
411 |
+
else:
|
412 |
+
annotations[variant_id] = []
|
413 |
+
st.write(f"### Clinical Annotations for Variant: {variant_id}")
|
414 |
+
if pgx_data:
|
415 |
+
st.json(pgx_data)
|
416 |
+
else:
|
417 |
+
st.write(f"No annotations for variant {variant_id}.")
|
418 |
+
else:
|
419 |
+
st.write("No variants found for the specified PharmGKB gene accession.")
|
420 |
else:
|
421 |
+
st.write("Please enter a PharmGKB accession for the target gene to retrieve pharmacogenomic data.")
|
422 |
|
423 |
# -----------------------------
|
424 |
# Tab 2: Clinical Trial Analytics
|
|
|
434 |
trial_data = []
|
435 |
for study in trials["studies"][:5]:
|
436 |
trial_data.append({
|
437 |
+
"Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"),
|
438 |
+
"Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"),
|
439 |
+
"Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["Not Available"])[0],
|
440 |
+
"Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "Not Available")
|
441 |
})
|
442 |
_display_dataframe(trial_data, list(trial_data[0].keys()))
|
443 |
else:
|
|
|
474 |
if st.button("Analyze Compound"):
|
475 |
with st.spinner("Querying PubChem..."):
|
476 |
smiles = None
|
477 |
+
# If the input is already a valid SMILES, use it; otherwise query PubChem
|
478 |
if Chem.MolFromSmiles(compound_input):
|
479 |
smiles = compound_input
|
480 |
else:
|
|
|
515 |
ema_info = scrape_ema_drug_info(drug_name)
|
516 |
ema_status = ema_info.get("EMA Approval Status") if ema_info else "Not Available"
|
517 |
|
518 |
+
# WHO Data from Canada Health Products API (this endpoint may return 404)
|
519 |
who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
|
520 |
who_status = "Yes" if who else "No"
|
521 |
|
|
|
565 |
os.remove(report_file)
|
566 |
|
567 |
# -----------------------------
|
568 |
+
# Tab 5: Literature Search
|
569 |
# -----------------------------
|
570 |
with tabs[4]:
|
571 |
st.header("Literature Search")
|
|
|
602 |
with tabs[5]:
|
603 |
st.header("Comprehensive Dashboard")
|
604 |
|
605 |
+
# Placeholder KPI counts (replace with real aggregated data if available)
|
606 |
fda_count = 5000 # Example value
|
607 |
ema_count = 3000 # Example value
|
608 |
who_count = 1500 # Example value
|
|
|
641 |
network_fig = _create_variant_network(sample_gene, sample_variants, sample_annotations)
|
642 |
st.plotly_chart(network_fig, use_container_width=True)
|
643 |
except Exception as e:
|
644 |
+
st.error(f"Error generating network graph: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|