drugapp / app.py
mgbam's picture
Update app.py
48811dc verified
raw
history blame
26.9 kB
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from rdkit import Chem
from rdkit.Chem import Draw
from fpdf import FPDF
import tempfile
import logging
from typing import Optional, Dict, List, Any
import os
# --- IMPORTANT: set_page_config MUST be the very first Streamlit command ---
st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
# Setup logging
logging.basicConfig(level=logging.ERROR)
# -----------------------------
# API Endpoints (Centralized Configuration)
# -----------------------------
API_ENDPOINTS = {
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
"who_drugs": "https://health-products.canada.ca/api/drug/product",
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
"bioportal_search": "https://data.bioontology.org/search",
"dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
}
# -----------------------------------
# Retrieve Secrets from st.secrets
# -----------------------------------
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY")
BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
PUB_EMAIL = st.secrets.get("PUB_EMAIL")
OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
if not PUB_EMAIL:
st.error("PubMed email (PUB_EMAIL) is not configured in secrets.")
if not BIOPORTAL_API_KEY:
st.error("BioPortal API key (BIOPORTAL_API_KEY) is not configured in secrets.")
if not OPENFDA_KEY:
st.error("OpenFDA API key (OPENFDA_KEY) is not configured in secrets.")
if not OPENAI_API_KEY:
st.error("OpenAI API key (OPENAI_API_KEY) is not configured in secrets.")
# -----------------------------------
# Initialize OpenAI Client (Latest SDK, GPT‑4)
# -----------------------------------
from openai import OpenAI
client = OpenAI(api_key=OPENAI_API_KEY)
def generate_content(prompt: str) -> str:
"""
Generates content using GPT-4 via the new OpenAI SDK.
Uses the chat completions endpoint.
"""
try:
completion = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
max_tokens=300
)
return completion.choices[0].message.content.strip()
except Exception as e:
st.error(f"Error generating content: {e}")
logging.error(f"OpenAI generation error: {e}")
return "Content generation failed."
# -----------------------------
# Utility Functions
# -----------------------------
def _query_api(endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict]:
"""Handles API requests with robust error handling."""
try:
response = requests.get(endpoint, params=params, headers=headers, timeout=15)
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as http_err:
st.error(f"HTTP error: {http_err} for endpoint {endpoint}.")
logging.error(f"HTTP error: {http_err} for endpoint {endpoint}.")
except requests.exceptions.RequestException as req_err:
st.error(f"Request error: {req_err} for endpoint {endpoint}.")
logging.error(f"Request error: {req_err} for endpoint {endpoint}.")
except Exception as e:
st.error(f"Unexpected error: {e} for endpoint {endpoint}.")
logging.error(f"Unexpected error: {e} for endpoint {endpoint}.")
return None
def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
"""Retrieves a drug's SMILES string from PubChem."""
url = API_ENDPOINTS["pubchem"].format(drug_name)
data = _query_api(url)
if data and 'PC_Compounds' in data and len(data['PC_Compounds']) > 0:
for prop in data['PC_Compounds'][0].get('props', []):
if prop.get('name') == 'Canonical SMILES':
return prop['value']['sval']
return None
def _draw_molecule(smiles: str) -> Optional[Any]:
"""Generates a 2D image of a molecule from its SMILES string."""
try:
mol = Chem.MolFromSmiles(smiles)
if mol:
return Draw.MolToImage(mol)
else:
st.error("Invalid SMILES string.")
return None
except Exception as e:
st.error(f"Error drawing molecule: {e}")
logging.error(f"Molecule drawing error: {e}")
return None
def _get_clinical_trials(query: str, email: Optional[str] = PUB_EMAIL) -> Optional[Dict]:
"""Queries clinicaltrials.gov with a search term."""
if not email:
st.error("PubMed email not configured.")
return None
if query.upper().startswith("NCT") and query[3:].isdigit():
params = {"id": query, "fmt": "json"}
else:
params = {"term": query, "retmax": 10, "retmode": "json", "email": email}
return _query_api(API_ENDPOINTS["clinical_trials"], params)
def _get_pubmed(query: str, email: Optional[str] = PUB_EMAIL) -> Optional[Dict]:
"""Queries PubMed using E-utilities."""
if not email:
st.error("PubMed email not configured.")
return None
params = {"db": "pubmed", "term": query, "retmax": 10, "retmode": "json", "email": email}
return _query_api(API_ENDPOINTS["pubmed"], params)
def _get_fda_approval(drug_name: str, api_key: Optional[str] = OPENFDA_KEY) -> Optional[Dict]:
"""Retrieves FDA drug label approval info."""
if not api_key:
st.error("OpenFDA API key not configured.")
return None
query = f'openfda.brand_name:"{drug_name}"'
params = {"api_key": api_key, "search": query, "limit": 1}
data = _query_api(API_ENDPOINTS["fda_drug_approval"], params)
if data and data.get('results'):
return data['results'][0]
return None
def _analyze_adverse_events(drug_name: str, api_key: Optional[str] = OPENFDA_KEY, limit: int = 5) -> Optional[Dict]:
"""Fetches adverse event reports from FAERS."""
if not api_key:
st.error("OpenFDA API key not configured.")
return None
query = f'patient.drug.medicinalproduct:"{drug_name}"'
params = {"api_key": api_key, "search": query, "limit": limit}
return _query_api(API_ENDPOINTS["faers_adverse_events"], params)
def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
"""Fetches clinical annotations for a given variant from PharmGKB."""
endpoint = API_ENDPOINTS["pharmgkb_variant_clinical_annotations"].format(variant_id)
data = _query_api(endpoint)
if data and data.get('data'):
return data
st.write(f"No clinical annotations found for variant {variant_id}.")
return None
def _get_pharmgkb_variants_for_gene(gene_symbol: str) -> Optional[List[str]]:
"""Retrieves variant IDs for a gene from PharmGKB."""
gene_search_endpoint = "https://api.pharmgkb.org/v1/data/gene"
params = {"name": gene_symbol}
gene_data = _query_api(gene_search_endpoint, params)
if gene_data and gene_data.get('data') and len(gene_data['data']) > 0:
gene_id = gene_data['data'][0]['id']
variants_endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(gene_id)
variants_data = _query_api(variants_endpoint)
if variants_data and variants_data.get('data'):
return [variant['id'] for variant in variants_data['data']]
st.warning(f"No variants found for gene {gene_symbol}.")
return None
def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
"""Scrapes EMA website for drug information."""
try:
search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
response = requests.get(search_url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
result = soup.find('a', class_='search-result__title')
if not result:
st.warning(f"No EMA data found for '{drug_name}'.")
return None
drug_page_url = "https://www.ema.europa.eu" + result.get('href')
drug_response = requests.get(drug_page_url, timeout=10)
drug_response.raise_for_status()
drug_soup = BeautifulSoup(drug_response.text, 'html.parser')
approval_status = drug_soup.find('span', class_='product-status')
approval_status = approval_status.get_text(strip=True) if approval_status else "Not Available"
indications = drug_soup.find('div', class_='indications')
indications = indications.get_text(strip=True) if indications else "Not Available"
return {
"Drug Name": drug_name,
"EMA Approval Status": approval_status,
"Indications": indications,
"EMA Drug Page": drug_page_url
}
except Exception as e:
st.error(f"Error scraping EMA data: {e}")
logging.error(f"EMA scraping error: {e}")
return None
def _get_dailymed_label(drug_name: str) -> Optional[Dict]:
"""Retrieves DailyMed label info."""
try:
params = {"drug_name": drug_name, "page": 1, "pagesize": 1}
data = _query_api(API_ENDPOINTS["dailymed"], params)
if data and data.get('data') and len(data['data']) > 0:
label_url = data['data'][0]['url']
return {"Label URL": label_url}
st.warning(f"No DailyMed label found for '{drug_name}'.")
return None
except Exception as e:
st.error(f"Error fetching DailyMed data: {e}")
logging.error(f"DailyMed error: {e}")
return None
def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
"""Fetches ontology data from BioPortal."""
if not BIOPORTAL_API_KEY:
st.error("BioPortal API key not configured.")
return None
if not term:
st.error("Please provide a search term for ontology search.")
return None
headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
params = {"q": term, "ontologies": ontology}
data = _query_api(API_ENDPOINTS["bioportal_search"], params, headers)
if data and data.get('collection'):
return data
st.warning("No results found for the BioPortal query.")
return None
def _save_pdf_report(report_content: str, filename: str):
"""Saves report content as a PDF file."""
try:
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 10, report_content)
pdf.output(filename)
return filename
except Exception as e:
st.error(f"Error saving PDF report: {e}")
logging.error(f"PDF save error: {e}")
return None
def _display_dataframe(data: List[Dict[str, Any]], columns: List[str]):
"""Displays a DataFrame in Streamlit."""
if data:
df = pd.DataFrame(data, columns=columns)
st.dataframe(df)
return df
st.warning("No data available for display.")
return None
# -----------------------------
# Streamlit App Layout and Tabs
# -----------------------------
st.title("🔬 Pharma Research Expert Platform")
st.markdown("An integrated platform for drug discovery, clinical research, and regulatory affairs.")
tabs = st.tabs([
"💊 Drug Development",
"📊 Trial Analytics",
"🧬 Molecular Profiling",
"📜 Regulatory Intelligence",
"📚 Literature Search",
"📈 Dashboard"
])
# -----------------------------
# Tab 1: Drug Development
# -----------------------------
with tabs[0]:
st.header("AI-Driven Drug Development Strategy")
target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
target_gene = st.text_input("Target Gene (for Pharmacogenomics):", placeholder="Enter the gene symbol")
strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
if st.button("Generate Development Plan"):
with st.spinner("Generating plan..."):
plan_prompt = (
f"Develop a comprehensive drug development plan for treating {target} "
f"using a {strategy} strategy. Include sections on target validation, lead optimization, "
f"preclinical testing, clinical trial design, regulatory submission strategy, market analysis, "
f"and competitive landscape. Highlight key milestones and challenges."
)
plan = generate_content(plan_prompt)
st.subheader("Comprehensive Development Plan")
st.markdown(plan)
# FDA Regulatory Insights
if target:
fda_info = _get_fda_approval(target.split()[0])
st.subheader("FDA Regulatory Insights")
if fda_info:
st.json(fda_info)
else:
st.write("No FDA data found for the given target.")
# Pharmacogenomic Considerations via PharmGKB
st.subheader("Pharmacogenomic Considerations")
if target_gene:
variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
if variant_ids:
for variant_id in variant_ids[:5]:
st.write(f"### Clinical Annotations for Variant: {variant_id}")
pgx_data = _get_pharmgkb_clinical_annotations(variant_id)
if pgx_data:
st.json(pgx_data)
else:
st.write(f"No annotations for variant {variant_id}.")
else:
st.write("No variants found for the specified gene.")
else:
st.write("Please enter a target gene for pharmacogenomic data.")
# -----------------------------
# Tab 2: Clinical Trial Analytics
# -----------------------------
with tabs[1]:
st.header("Clinical Trial Landscape Analytics")
trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number")
if st.button("Analyze Trial Landscape"):
with st.spinner("Fetching trial data..."):
trials = _get_clinical_trials(trial_query)
if trials and trials.get('studies'):
trial_data = []
for study in trials['studies'][:5]:
trial_data.append({
"Title": study.get('briefTitle', 'N/A'),
"Status": study.get('overallStatus', 'N/A'),
"Phase": study.get('phase', 'Not Available'),
"Enrollment": study.get('enrollmentCount', 'Not Available')
})
_display_dataframe(trial_data, list(trial_data[0].keys()))
else:
st.warning("No clinical trials found for the given query.")
ae_data = _analyze_adverse_events(trial_query)
if ae_data and ae_data.get('results'):
st.subheader("Adverse Event Profile (Top 5 Reports)")
ae_results = ae_data['results'][:5]
ae_df = pd.json_normalize(ae_results)
st.dataframe(ae_df)
if 'patient.reaction.reactionmeddrapt' in ae_df.columns:
try:
reactions = ae_df['patient.reaction.reactionmeddrapt'].explode().dropna()
top_reactions = reactions.value_counts().nlargest(10)
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x=top_reactions.values, y=top_reactions.index, ax=ax)
ax.set_title('Top Adverse Reactions')
ax.set_xlabel('Frequency')
ax.set_ylabel('Reaction')
st.pyplot(fig)
except Exception as e:
st.error(f"Error visualizing adverse events: {e}")
else:
st.write("No adverse event data available.")
# -----------------------------
# Tab 3: Molecular Profiling
# -----------------------------
with tabs[2]:
st.header("Advanced Molecular Profiling")
compound_input = st.text_input("Compound Identifier:", placeholder="Enter drug name, SMILES, or INN")
if st.button("Analyze Compound"):
with st.spinner("Querying PubChem..."):
smiles = None
if Chem.MolFromSmiles(compound_input):
smiles = compound_input
else:
smiles = _get_pubchem_smiles(compound_input)
if smiles:
img = _draw_molecule(smiles)
if img:
st.image(img, caption="2D Structure")
else:
st.error("Compound structure not found in databases.")
pubchem_data = _query_api(API_ENDPOINTS["pubchem"].format(compound_input))
if pubchem_data and pubchem_data.get('PC_Compounds'):
st.subheader("Physicochemical Properties")
props = pubchem_data['PC_Compounds'][0].get('props', [])
mw = next((prop['value']['sval'] for prop in props if prop.get('name') == 'Molecular Weight'), 'N/A')
logp = next((prop['value']['sval'] for prop in props if prop.get('name') == 'LogP'), 'N/A')
st.write(f"**Molecular Weight:** {mw} \n**LogP:** {logp}")
else:
st.error("Physicochemical properties not available.")
# -----------------------------
# Tab 4: Regulatory Intelligence
# -----------------------------
with tabs[3]:
st.header("Global Regulatory Monitoring")
drug_name = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
if st.button("Generate Regulatory Report"):
with st.spinner("Compiling regulatory data..."):
fda_info = _get_fda_approval(drug_name)
fda_status = "Not Approved"
if fda_info and fda_info.get('openfda', {}).get('brand_name'):
fda_status = ', '.join(fda_info['openfda']['brand_name'])
ema_info = scrape_ema_drug_info(drug_name)
ema_status = ema_info.get('EMA Approval Status') if ema_info else "Not Available"
who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
who_status = "Yes" if who else "No"
dailymed_info = _get_dailymed_label(drug_name)
dailymed_status = dailymed_info.get("Label URL") if dailymed_info else "Not Available"
st.subheader("Regulatory Status")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown("**FDA Status**")
st.write(fda_status)
with col2:
st.markdown("**EMA Status**")
st.write(ema_status)
with col3:
st.markdown("**WHO Essential Medicine**")
st.write(who_status)
with col4:
st.markdown("**DailyMed Label**")
st.write(dailymed_status)
if dailymed_info and dailymed_info.get("Label URL"):
st.markdown(f"[View DailyMed Label]({dailymed_info['Label URL']})")
if ema_info and ema_info.get("EMA Drug Page"):
st.markdown(f"[View EMA Drug Page]({ema_info['EMA Drug Page']})")
regulatory_content = (
f"### Regulatory Report\n\n"
f"**FDA Status:** {fda_status}\n\n"
f"**EMA Status:** {ema_status}\n\n"
f"**WHO Essential Medicine:** {who_status}\n\n"
f"**DailyMed Label:** {dailymed_status}\n"
)
if ema_info and ema_info.get("EMA Drug Page"):
regulatory_content += f"\n**EMA Drug Page:** {ema_info['EMA Drug Page']}\n"
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
report_file = _save_pdf_report(regulatory_content, tmp_file.name)
if report_file:
with open(report_file, "rb") as file:
st.download_button(
label="Download Regulatory Report (PDF)",
data=file,
file_name=f"{drug_name}_regulatory_report.pdf",
mime="application/pdf"
)
os.remove(report_file)
# -----------------------------
# Tab 5: Literature Search (PubMed and BioPortal)
# -----------------------------
with tabs[4]:
st.header("Literature Search")
search_term = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics")
if st.button("Search PubMed"):
with st.spinner("Searching PubMed..."):
pubmed_data = _get_pubmed(search_term)
if pubmed_data and pubmed_data.get('esearchresult', {}).get('idlist'):
idlist = pubmed_data['esearchresult']['idlist']
st.subheader(f"Found {len(idlist)} PubMed Results")
for article_id in idlist:
st.markdown(f"- [PMID: {article_id}](https://pubmed.ncbi.nlm.nih.gov/{article_id}/)")
else:
st.write("No PubMed results found.")
st.header("Ontology Search")
ontology_search_term = st.text_input("Enter search query for Ontology:", placeholder="e.g., Alzheimer's disease")
ontology_select = st.selectbox("Select Ontology", ["MESH", "NCIT", "GO", "SNOMEDCT"])
if st.button("Search BioPortal"):
with st.spinner("Searching BioPortal..."):
bioportal_data = _get_bioportal_data(ontology_select, ontology_search_term)
if bioportal_data and bioportal_data.get('collection'):
st.subheader(f"BioPortal Results for {ontology_select}")
for result in bioportal_data['collection']:
label = result.get('prefLabel', 'N/A')
ontology_id = result.get('@id', 'N/A')
st.markdown(f"- **{label}** ({ontology_id})")
else:
st.write("No ontology results found.")
# -----------------------------
# Tab 6: Dashboard
# -----------------------------
with tabs[5]:
st.header("Comprehensive Dashboard")
# Placeholder KPI counts (replace with actual aggregated data as available)
fda_count = 5000 # Example value
ema_count = 3000 # Example value
who_count = 1500 # Example value
trials_count = 12000 # Example value
pub_count = 250000 # Example value
def _create_kpi_dashboard(fda: int, ema: int, who: int, trials: int, pubs: int):
col1, col2, col3, col4, col5 = st.columns(5)
col1.metric("FDA Approved Drugs", fda)
col2.metric("EMA Approved Drugs", ema)
col3.metric("WHO Essential Medicines", who)
col4.metric("Ongoing Trials", trials)
col5.metric("Publications", pubs)
_create_kpi_dashboard(fda_count, ema_count, who_count, trials_count, pub_count)
st.subheader("Trend Analysis")
years = list(range(2000, 2026))
approvals_per_year = [fda_count // len(years)] * len(years) # Placeholder example data
fig, ax = plt.subplots(figsize=(10,6))
sns.lineplot(x=years, y=approvals_per_year, marker="o", ax=ax)
ax.set_title("FDA Approvals Over Time")
ax.set_xlabel("Year")
ax.set_ylabel("Number of Approvals")
st.pyplot(fig)
st.subheader("Gene-Variant-Drug Network (Sample)")
# Sample network demonstration
sample_gene = "CYP2C19"
sample_variants = ["rs4244285", "rs12248560"]
sample_annotations = {
"rs4244285": ["Clopidogrel", "Omeprazole"],
"rs12248560": ["Sertraline"]
}
try:
import networkx as nx
import plotly.graph_objects as go
def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -> go.Figure:
G = nx.Graph()
G.add_node(gene, color='lightblue')
for variant in variants:
G.add_node(variant, color='lightgreen')
G.add_edge(gene, variant)
for drug in annotations.get(variant, []):
if drug != 'N/A':
G.add_node(drug, color='lightcoral')
G.add_edge(variant, drug)
pos = nx.spring_layout(G)
edge_x, edge_y = [], []
for edge in G.edges():
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
edge_x.extend([x0, x1, None])
edge_y.extend([y0, y1, None])
edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'),
hoverinfo='none', mode='lines')
node_x, node_y, node_text, node_color = [], [], [], []
for node in G.nodes():
x, y = pos[node]
node_x.append(x)
node_y.append(y)
node_text.append(node)
node_color.append(G.nodes[node]['color'])
node_trace = go.Scatter(
x=node_x, y=node_y, mode='markers+text', hoverinfo='text',
text=node_text, textposition="bottom center",
marker=dict(showscale=False, colorscale='YlGnBu', color=node_color, size=10, line_width=2)
)
fig = go.Figure(data=[edge_trace, node_trace],
layout=go.Layout(
title='<br>Gene-Variant-Drug Network',
titlefont_size=16,
showlegend=False,
hovermode='closest',
margin=dict(b=20, l=5, r=5, t=40),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
))
return fig
network_fig = _create_variant_network(sample_gene, sample_variants, sample_annotations)
st.plotly_chart(network_fig, use_container_width=True)
except Exception as e:
st.error(f"Error generating network graph: {e}")
# -----------------------------
# Sidebar Information
# -----------------------------
st.sidebar.header("About")
st.sidebar.info("""
**Pharma Research Expert Platform**
An integrated tool for drug discovery, clinical research, and regulatory affairs.
**Developed by:** Your Name
**Contact:** [[email protected]](mailto:[email protected])
""")