|
import streamlit as st |
|
from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel |
|
from rdkit import Chem |
|
from rdkit.Chem import Draw |
|
from fpdf import FPDF |
|
import tempfile |
|
import time |
|
import requests |
|
import xml.etree.ElementTree as ET |
|
import json |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from typing import Optional, Dict, List, Any |
|
import os |
|
import logging |
|
|
|
|
|
logging.basicConfig(level=logging.ERROR) |
|
|
|
|
|
API_ENDPOINTS = { |
|
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies", |
|
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON", |
|
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", |
|
"who_drugs": "https://health-products.canada.ca/api/drug/product", |
|
|
|
"fda_drug_approval": "https://api.fda.gov/drug/label.json", |
|
"faers_adverse_events": "https://api.fda.gov/drug/event.json", |
|
"pharmgkb": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations", |
|
"bioportal": "https://data.bioontology.org/ontologies" |
|
} |
|
|
|
|
|
|
|
if "PUB_EMAIL" in st.secrets: |
|
PUBMED_EMAIL = st.secrets["PUB_EMAIL"] |
|
else: |
|
PUBMED_EMAIL = None |
|
st.error("PubMed email not found in secrets. Please add the PUB_EMAIL to secrets.") |
|
CLINICALTRIALS_EMAIL = PUBMED_EMAIL |
|
|
|
|
|
if "BIOPORTAL_API_KEY" in st.secrets: |
|
BIOPORTAL_API_KEY = st.secrets["BIOPORTAL_API_KEY"] |
|
else: |
|
BIOPORTAL_API_KEY = None |
|
st.error("BioPortal API key not found in secrets. Please add the BIOPORTAL_API_KEY to secrets.") |
|
|
|
|
|
if "OPENFDA_KEY" in st.secrets: |
|
OPENFDA_KEY = st.secrets["OPENFDA_KEY"] |
|
else: |
|
OPENFDA_KEY = None |
|
st.error("OpenFDA API key not found in secrets. Please add the OPENFDA_KEY to secrets.") |
|
|
|
|
|
content_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=HfApiModel()) |
|
|
|
|
|
def _query_api(endpoint: str, params: Optional[Dict] = None) -> Optional[Dict]: |
|
"""Handles API requests with robust error handling.""" |
|
try: |
|
response = requests.get(endpoint, params=params, timeout=15) |
|
response.raise_for_status() |
|
return response.json() |
|
except requests.exceptions.RequestException as e: |
|
st.error(f"API request failed: {e} for endpoint {endpoint}. Please check connectivity and the endpoint.") |
|
logging.error(f"API request failed: {e} for endpoint {endpoint}.") |
|
return None |
|
|
|
def _query_pubmed(query: str, email: Optional[str] = PUBMED_EMAIL) -> Optional[Dict]: |
|
"""Queries PubMed with robust error handling.""" |
|
if not email: |
|
st.error("PubMed email not configured.") |
|
return None |
|
|
|
params = { |
|
"db": "pubmed", |
|
"term": query, |
|
"retmax": 10, |
|
"retmode": "json", |
|
"email": email |
|
} |
|
return _query_api(API_ENDPOINTS["pubmed"], params) |
|
|
|
|
|
def _get_pubchem_smiles(drug_name: str) -> Optional[str]: |
|
"""Retrieves SMILES from PubChem, returns None on failure.""" |
|
url = API_ENDPOINTS["pubchem"].format(drug_name) |
|
data = _query_api(url) |
|
if data and 'PC_Compounds' in data and data['PC_Compounds'][0]['props']: |
|
|
|
for prop in data['PC_Compounds'][0]['props']: |
|
if 'name' in prop and prop['name'] == 'Canonical SMILES': |
|
return prop['value']['sval'] |
|
return None |
|
|
|
|
|
def _draw_molecule(smiles: str) -> Optional[any]: |
|
"""Generates a 2D molecule image from SMILES.""" |
|
try: |
|
mol = Chem.MolFromSmiles(smiles) |
|
if mol: |
|
img = Draw.MolToImage(mol) |
|
return img |
|
else: |
|
st.error("Invalid SMILES string.") |
|
return None |
|
except Exception as e: |
|
st.error(f"Error generating molecule image: {str(e)}") |
|
logging.error(f"Error generating molecule image: {str(e)}") |
|
return None |
|
|
|
|
|
def _get_clinical_trials(query: str, email:Optional[str] = CLINICALTRIALS_EMAIL) -> Optional[Dict]: |
|
"""Queries clinicaltrials.gov with search term.""" |
|
if not email: |
|
st.error("Clinical Trials email not configured.") |
|
return None |
|
if query.upper().startswith("NCT") and query[3:].isdigit(): |
|
params = { |
|
"id": query, |
|
"fmt": "json" |
|
} |
|
else: |
|
params = { |
|
"query.term": query, |
|
"fmt": "json", |
|
"email": email |
|
} |
|
return _query_api(API_ENDPOINTS["clinical_trials"], params) |
|
|
|
|
|
def _get_fda_approval(drug_name: str, api_key:Optional[str] = OPENFDA_KEY) -> Optional[Dict]: |
|
"""Retrieves FDA approval info.""" |
|
if not api_key: |
|
st.error("OpenFDA key not configured.") |
|
return None |
|
url = f"{API_ENDPOINTS['fda_drug_approval']}?api_key={api_key}&search=openfda.brand_name:\"{drug_name}\"" |
|
data = _query_api(url) |
|
if data and 'results' in data and data['results']: |
|
return data['results'][0] |
|
else: |
|
return None |
|
|
|
def _analyze_adverse_events(drug_name: str, api_key:Optional[str] = OPENFDA_KEY, limit: int = 5) -> Optional[Dict]: |
|
"""Fetches and analyzes adverse event reports from FAERS.""" |
|
if not api_key: |
|
st.error("OpenFDA key not configured.") |
|
return None |
|
url = f"{API_ENDPOINTS['faers_adverse_events']}?api_key={api_key}&search=patient.drug.medicinalproduct:\"{drug_name}\"&limit={limit}" |
|
data = _query_api(url) |
|
if data and 'results' in data: |
|
return data |
|
else: |
|
return None |
|
|
|
|
|
def _get_pharmgkb_data(gene:str) -> Optional[Dict]: |
|
"""Fetches pharmacogenomic data from PharmGKB.""" |
|
url = API_ENDPOINTS["pharmgkb"].format(gene) |
|
data = _query_api(url) |
|
if data and 'clinicalAnnotations' in data: |
|
return data |
|
else: |
|
return None |
|
|
|
def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]: |
|
"""Fetches data from BioPortal.""" |
|
if not BIOPORTAL_API_KEY: |
|
st.error("BioPortal API key not found. Please add the BIOPORTAL_API_KEY to secrets.") |
|
return None |
|
if not term: |
|
st.error("Please provide a search term.") |
|
return None |
|
|
|
headers = { |
|
"Authorization": f"apikey token={BIOPORTAL_API_KEY}" |
|
} |
|
params = { |
|
"q": term, |
|
"ontologies": ontology |
|
} |
|
|
|
url = f"{API_ENDPOINTS['bioportal']}/search" |
|
try: |
|
response = requests.get(url, headers=headers, params=params, timeout=15) |
|
response.raise_for_status() |
|
data = response.json() |
|
if data and 'collection' in data: |
|
return data |
|
else: |
|
st.warning("No results found for the BioPortal query.") |
|
return None |
|
except requests.exceptions.RequestException as e: |
|
st.error(f"BioPortal API request failed: {e} Please check connectivity and ensure you have the correct API Key.") |
|
logging.error(f"BioPortal API request failed: {e}") |
|
return None |
|
|
|
def _save_pdf_report(report_content: str, filename: str): |
|
"""Saves content to a PDF file.""" |
|
pdf = FPDF() |
|
pdf.add_page() |
|
pdf.set_font("Arial", size=12) |
|
pdf.multi_cell(0, 10, report_content) |
|
pdf.output(filename) |
|
return filename |
|
|
|
def _display_dataframe(data: list, columns: list): |
|
"""Displays data in a dataframe format.""" |
|
if data: |
|
df = pd.DataFrame(data, columns=columns) |
|
st.dataframe(df) |
|
return df |
|
else: |
|
st.warning("No data found for dataframe creation.") |
|
return None |
|
|
|
|
|
st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide") |
|
st.title("🔬 Pharma Research Expert Platform") |
|
st.markdown("An integrated platform for drug discovery, clinical research, and regulatory affairs.") |
|
|
|
|
|
tabs = st.tabs(["💊 Drug Development", "📊 Trial Analytics", "🧬 Molecular Profiling", "📜 Regulatory Intelligence", "📚 Literature Search"]) |
|
|
|
|
|
with tabs[0]: |
|
st.header("AI-Driven Drug Development Strategy") |
|
target = st.text_input("Target Disease/Pathway:", placeholder="Enter biological target or disease mechanism") |
|
target_gene = st.text_input("Target Gene (for pharmacogenomics)", placeholder="Enter the gene associated with target") |
|
strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"]) |
|
|
|
if st.button("Generate Development Plan"): |
|
with st.spinner("Analyzing target and competitive landscape..."): |
|
|
|
plan_prompt = f"""Develop a comprehensive drug development plan for the treatment of {target} using a {strategy} strategy. |
|
Include sections on target validation, lead optimization, preclinical testing, clinical trial design, regulatory submission strategy, market analysis, and competitive landscape. Highlight key milestones and potential challenges. """ |
|
plan = content_agent.run(plan_prompt) |
|
|
|
st.subheader("Comprehensive Development Plan") |
|
st.markdown(plan) |
|
|
|
|
|
if target: |
|
fda_info = _get_fda_approval(target.split()[0]) |
|
|
|
if fda_info: |
|
st.subheader("FDA Regulatory Insights") |
|
st.json(fda_info) |
|
else: |
|
st.write("No relevant FDA data found.") |
|
else: |
|
st.write("Please enter a target to get relevant FDA data") |
|
|
|
|
|
st.subheader("Pharmacogenomic Considerations") |
|
pgx_data = _get_pharmgkb_data(target_gene) |
|
if pgx_data: |
|
st.write(pgx_data) |
|
else: |
|
st.write("No relevant pharmacogenomic data found.") |
|
|
|
|
|
|
|
with tabs[1]: |
|
st.header("Clinical Trial Landscape Analytics") |
|
trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number") |
|
|
|
if st.button("Analyze Trial Landscape"): |
|
with st.spinner("Aggregating global trial data..."): |
|
trials = _get_clinical_trials(trial_query) |
|
if trials and trials['studies']: |
|
st.subheader("Recent Clinical Trials") |
|
trial_data = [] |
|
for study in trials['studies'][:5]: |
|
|
|
trial_data.append({ |
|
"Title": study['briefTitle'], |
|
"Status": study['overallStatus'], |
|
"Phase": study['phase'] if 'phase' in study else 'Not Available', |
|
"Enrollment": study['enrollmentCount'] if 'enrollmentCount' in study else 'Not Available' |
|
}) |
|
|
|
trial_df = _display_dataframe(trial_data, list(trial_data[0].keys())) if trial_data else None |
|
|
|
if trial_df is not None: |
|
st.markdown("### Clinical Trial Summary (First 5 trials)") |
|
st.dataframe(trial_df) |
|
|
|
|
|
|
|
ae_data = _analyze_adverse_events(trial_query) |
|
if ae_data and ae_data['results']: |
|
st.subheader("Adverse Event Profile (Top 5 Reports)") |
|
|
|
ae_results = ae_data['results'][:5] |
|
ae_df = pd.DataFrame(ae_results) |
|
st.dataframe(ae_df) |
|
|
|
|
|
if 'patient' in ae_df and not ae_df.empty: |
|
try: |
|
drug_events = [] |
|
for patient in ae_df['patient']: |
|
if isinstance(patient,dict) and 'drug' in patient: |
|
for drug in patient['drug']: |
|
if isinstance(drug,dict) and 'medicinalproduct' in drug and 'reaction' in patient: |
|
reactions = [reaction.get('reactionmeddrapt','') for reaction in patient['reaction']] |
|
for r in reactions: |
|
drug_events.append((drug.get('medicinalproduct', 'N/A'), r)) |
|
|
|
df_drug_events = pd.DataFrame(drug_events,columns=['Drug', 'Reaction']) |
|
|
|
if not df_drug_events.empty: |
|
top_reactions = df_drug_events['Reaction'].value_counts().nlargest(10) |
|
|
|
fig, ax = plt.subplots(figsize=(10,6)) |
|
sns.barplot(x=top_reactions.index, y=top_reactions.values, ax=ax) |
|
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right") |
|
plt.title('Top Adverse Reactions') |
|
plt.xlabel('Adverse Reaction') |
|
plt.ylabel('Frequency') |
|
st.pyplot(fig) |
|
|
|
|
|
st.markdown("### Top 10 Adverse Reaction Summary") |
|
st.dataframe(pd.DataFrame({'Reaction': top_reactions.index, 'Frequency': top_reactions.values})) |
|
|
|
except Exception as e: |
|
st.error(f"Error processing adverse events data: {e}") |
|
else: |
|
st.warning("No clinical trials found for the given search term.") |
|
|
|
|
|
|
|
with tabs[2]: |
|
st.header("Advanced Molecular Profiling") |
|
compound_input = st.text_input("Compound Identifier:", |
|
placeholder="Enter drug name, SMILES, or INN") |
|
|
|
if st.button("Analyze Compound"): |
|
with st.spinner("Querying global databases..."): |
|
|
|
smiles = compound_input if Chem.MolFromSmiles(compound_input) else _get_pubchem_smiles(compound_input) |
|
|
|
if smiles: |
|
img = _draw_molecule(smiles) |
|
if img: |
|
st.image(img, caption="2D Structure") |
|
else: |
|
st.error("Compound structure not found in databases.") |
|
|
|
|
|
pubchem_data = _query_api(API_ENDPOINTS["pubchem"].format(compound_input)) |
|
if pubchem_data and 'PC_Compounds' in pubchem_data and pubchem_data['PC_Compounds']: |
|
st.subheader("Physicochemical Properties") |
|
props = pubchem_data['PC_Compounds'][0]['props'] |
|
mw = next((prop['value']['sval'] for prop in props if 'name' in prop and prop['name'] == 'Molecular Weight'), 'N/A') |
|
logp = next((prop['value']['sval'] for prop in props if 'name' in prop and prop['name'] == 'LogP'), 'N/A') |
|
|
|
st.write(f""" |
|
Molecular Weight: {mw} |
|
LogP: {logp} |
|
""") |
|
else: |
|
st.error("Physicochemical properties not found.") |
|
|
|
|
|
|
|
with tabs[3]: |
|
st.header("Global Regulatory Monitoring") |
|
drug_name = st.text_input("Drug Product:", placeholder="Enter generic or brand name") |
|
|
|
if st.button("Generate Regulatory Report"): |
|
with st.spinner("Compiling global regulatory status..."): |
|
|
|
fda = _get_fda_approval(drug_name) |
|
|
|
who = _query_api(API_ENDPOINTS["who_drugs"], {"name": drug_name}) |
|
|
|
st.subheader("Regulatory Status") |
|
col1, col2, col3 = st.columns(3) |
|
with col1: |
|
st.markdown("**FDA Status**") |
|
st.write(fda['openfda']['brand_name'][0] if fda and 'openfda' in fda and 'brand_name' in fda['openfda'] else "Not approved") |
|
with col2: |
|
st.markdown("**EMA Status**") |
|
|
|
st.write("Not Available") |
|
with col3: |
|
st.markdown("**WHO Essential Medicine**") |
|
st.write("Yes" if who else "No") |
|
|
|
|
|
regulatory_content = f"### Regulatory Report\n\nFDA Status: {fda['openfda']['brand_name'][0] if fda and 'openfda' in fda and 'brand_name' in fda['openfda'] else 'Not Approved'}\n\nEMA Status: {'Not Available'}\n\nWHO Essential Medicine: {'Yes' if who else 'No'}" |
|
report_file = _save_pdf_report(regulatory_content, f"{drug_name}_regulatory_report.pdf") |
|
if report_file: |
|
with open(report_file, "rb") as file: |
|
st.download_button( |
|
label="Download Regulatory Report (PDF)", |
|
data=file, |
|
file_name=f"{drug_name}_regulatory_report.pdf", |
|
mime="application/pdf") |
|
|
|
|
|
with tabs[4]: |
|
st.header("Literature Search") |
|
search_term = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics") |
|
if st.button("Search PubMed"): |
|
with st.spinner("Searching PubMed..."): |
|
pubmed_data = _query_pubmed(search_term) |
|
if pubmed_data and 'esearchresult' in pubmed_data and 'idlist' in pubmed_data['esearchresult'] and pubmed_data['esearchresult']['idlist']: |
|
st.subheader("PubMed Search Results") |
|
st.write(f"Found {len(pubmed_data['esearchresult']['idlist'])} results for '{search_term}':") |
|
for article_id in pubmed_data['esearchresult']['idlist']: |
|
st.write(f"- PMID: {article_id}") |
|
else: |
|
st.write("No results found for that term.") |
|
|
|
st.header("Ontology Search") |
|
ontology_search_term = st.text_input("Enter Search query for Ontology:", placeholder="Enter disease or ontology") |
|
ontology_select = st.selectbox("Select Ontology", ["MESH","NCIT","GO", "SNOMEDCT"]) |
|
if st.button("Search BioPortal"): |
|
with st.spinner("Searching Ontology..."): |
|
bioportal_data = _get_bioportal_data(ontology_select, ontology_search_term) |
|
if bioportal_data and 'collection' in bioportal_data: |
|
st.subheader(f"BioPortal Search Results for {ontology_select}") |
|
for result in bioportal_data['collection']: |
|
st.write(f"- {result['prefLabel']} ({result['@id']})") |
|
else: |
|
st.write("No results found") |