Update app.py
Browse files
app.py
CHANGED
|
@@ -1,27 +1,27 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import requests
|
| 3 |
-
from
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import matplotlib.pyplot as plt
|
| 6 |
import seaborn as sns
|
| 7 |
-
from rdkit import Chem
|
| 8 |
-
from rdkit.Chem import Draw
|
| 9 |
from fpdf import FPDF
|
| 10 |
import tempfile
|
| 11 |
import logging
|
| 12 |
-
from typing import Optional, Dict, List, Any
|
| 13 |
import os
|
| 14 |
import plotly.graph_objects as go
|
| 15 |
import networkx as nx
|
|
|
|
| 16 |
|
| 17 |
-
#
|
|
|
|
|
|
|
|
|
|
| 18 |
st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
|
| 19 |
-
|
| 20 |
-
# Setup logging
|
| 21 |
logging.basicConfig(level=logging.ERROR)
|
| 22 |
|
| 23 |
# -----------------------------
|
| 24 |
-
# API
|
| 25 |
# -----------------------------
|
| 26 |
API_ENDPOINTS = {
|
| 27 |
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies", # no email required
|
|
@@ -29,7 +29,7 @@ API_ENDPOINTS = {
|
|
| 29 |
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
|
| 30 |
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
| 31 |
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
|
| 32 |
-
# PharmGKB endpoints
|
| 33 |
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
|
| 34 |
"pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
|
| 35 |
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
|
|
@@ -37,37 +37,37 @@ API_ENDPOINTS = {
|
|
| 37 |
# RxNorm endpoints
|
| 38 |
"rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
|
| 39 |
"rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
|
| 40 |
-
# RxClass endpoint
|
| 41 |
"rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
|
| 42 |
}
|
| 43 |
|
| 44 |
-
#
|
| 45 |
-
# Retrieve Secrets
|
| 46 |
-
#
|
| 47 |
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY")
|
| 48 |
BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
|
| 49 |
PUB_EMAIL = st.secrets.get("PUB_EMAIL")
|
| 50 |
OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
|
| 51 |
|
| 52 |
if not PUB_EMAIL:
|
| 53 |
-
st.error("PubMed email (PUB_EMAIL) is not configured
|
| 54 |
if not BIOPORTAL_API_KEY:
|
| 55 |
-
st.error("BioPortal API key (BIOPORTAL_API_KEY) is not configured
|
| 56 |
if not OPENFDA_KEY:
|
| 57 |
-
st.error("OpenFDA API key (OPENFDA_KEY) is not configured
|
| 58 |
if not OPENAI_API_KEY:
|
| 59 |
-
st.error("OpenAI API key (OPENAI_API_KEY) is not configured
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
# Initialize OpenAI Client (
|
| 63 |
-
#
|
| 64 |
from openai import OpenAI
|
| 65 |
-
|
| 66 |
|
| 67 |
def generate_content(prompt: str) -> str:
|
| 68 |
-
"""Generate content using GPT
|
| 69 |
try:
|
| 70 |
-
completion =
|
| 71 |
model="gpt-4",
|
| 72 |
messages=[{"role": "user", "content": prompt}],
|
| 73 |
max_tokens=300
|
|
@@ -82,48 +82,41 @@ def generate_content(prompt: str) -> str:
|
|
| 82 |
# Utility Functions
|
| 83 |
# -----------------------------
|
| 84 |
def _query_api(endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict]:
|
| 85 |
-
"""Handles API requests with
|
| 86 |
try:
|
| 87 |
response = requests.get(endpoint, params=params, headers=headers, timeout=15)
|
| 88 |
response.raise_for_status()
|
| 89 |
return response.json()
|
| 90 |
-
except requests.exceptions.HTTPError as http_err:
|
| 91 |
-
st.error(f"HTTP error: {http_err} for endpoint {endpoint}.")
|
| 92 |
-
logging.error(f"HTTP error: {http_err} for endpoint {endpoint}.")
|
| 93 |
-
except requests.exceptions.RequestException as req_err:
|
| 94 |
-
st.error(f"Request error: {req_err} for endpoint {endpoint}.")
|
| 95 |
-
logging.error(f"Request error: {req_err} for endpoint {endpoint}.")
|
| 96 |
except Exception as e:
|
| 97 |
-
st.error(f"
|
| 98 |
-
logging.error(f"
|
| 99 |
return None
|
| 100 |
|
| 101 |
def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
|
| 102 |
-
"""
|
| 103 |
url = API_ENDPOINTS["pubchem"].format(drug_name)
|
| 104 |
data = _query_api(url)
|
| 105 |
-
if data and
|
| 106 |
for prop in data["PC_Compounds"][0].get("props", []):
|
| 107 |
if prop.get("name") == "Canonical SMILES":
|
| 108 |
return prop["value"]["sval"]
|
| 109 |
return None
|
| 110 |
|
| 111 |
def _draw_molecule(smiles: str) -> Optional[Any]:
|
| 112 |
-
"""
|
| 113 |
try:
|
| 114 |
mol = Chem.MolFromSmiles(smiles)
|
| 115 |
if mol:
|
| 116 |
return Draw.MolToImage(mol)
|
| 117 |
else:
|
| 118 |
-
st.error("Invalid SMILES
|
| 119 |
-
return None
|
| 120 |
except Exception as e:
|
| 121 |
st.error(f"Error drawing molecule: {e}")
|
| 122 |
-
logging.error(
|
| 123 |
-
|
| 124 |
|
| 125 |
def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
|
| 126 |
-
"""
|
| 127 |
url = API_ENDPOINTS["pubchem"].format(drug_name)
|
| 128 |
data = _query_api(url)
|
| 129 |
details = {}
|
|
@@ -141,7 +134,7 @@ def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
|
|
| 141 |
return None
|
| 142 |
|
| 143 |
def _get_clinical_trials(query: str) -> Optional[Dict]:
|
| 144 |
-
"""
|
| 145 |
if query.upper().startswith("NCT") and query[3:].isdigit():
|
| 146 |
params = {"id": query, "fmt": "json"}
|
| 147 |
else:
|
|
@@ -149,12 +142,12 @@ def _get_clinical_trials(query: str) -> Optional[Dict]:
|
|
| 149 |
return _query_api(API_ENDPOINTS["clinical_trials"], params)
|
| 150 |
|
| 151 |
def _get_pubmed(query: str) -> Optional[Dict]:
|
| 152 |
-
"""
|
| 153 |
params = {"db": "pubmed", "term": query, "retmax": 10, "retmode": "json", "email": PUB_EMAIL}
|
| 154 |
return _query_api(API_ENDPOINTS["pubmed"], params)
|
| 155 |
|
| 156 |
def _get_fda_approval(drug_name: str) -> Optional[Dict]:
|
| 157 |
-
"""
|
| 158 |
if not OPENFDA_KEY:
|
| 159 |
st.error("OpenFDA API key not configured.")
|
| 160 |
return None
|
|
@@ -166,7 +159,7 @@ def _get_fda_approval(drug_name: str) -> Optional[Dict]:
|
|
| 166 |
return None
|
| 167 |
|
| 168 |
def _analyze_adverse_events(drug_name: str, limit: int = 5) -> Optional[Dict]:
|
| 169 |
-
"""
|
| 170 |
if not OPENFDA_KEY:
|
| 171 |
st.error("OpenFDA API key not configured.")
|
| 172 |
return None
|
|
@@ -175,7 +168,7 @@ def _analyze_adverse_events(drug_name: str, limit: int = 5) -> Optional[Dict]:
|
|
| 175 |
return _query_api(API_ENDPOINTS["faers_adverse_events"], params)
|
| 176 |
|
| 177 |
def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
|
| 178 |
-
"""
|
| 179 |
endpoint = API_ENDPOINTS["pharmgkb_variant_clinical_annotations"].format(variant_id)
|
| 180 |
data = _query_api(endpoint)
|
| 181 |
if data and data.get("data"):
|
|
@@ -184,12 +177,9 @@ def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
|
|
| 184 |
return None
|
| 185 |
|
| 186 |
def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]]:
|
| 187 |
-
"""
|
| 188 |
-
Retrieves variant IDs for a gene using its PharmGKB accession.
|
| 189 |
-
If the accession does not start with "PA", warn the user.
|
| 190 |
-
"""
|
| 191 |
if not pharmgkb_gene_id.startswith("PA"):
|
| 192 |
-
st.warning("Please provide a valid PharmGKB accession
|
| 193 |
return None
|
| 194 |
endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(pharmgkb_gene_id)
|
| 195 |
data = _query_api(endpoint)
|
|
@@ -199,11 +189,9 @@ def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]
|
|
| 199 |
return None
|
| 200 |
|
| 201 |
def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
|
| 202 |
-
"""
|
| 203 |
-
Retrieves PharmGKB gene data using a PharmGKB accession.
|
| 204 |
-
"""
|
| 205 |
if not pharmgkb_gene_id.startswith("PA"):
|
| 206 |
-
st.warning("Please enter a valid PharmGKB gene accession
|
| 207 |
return None
|
| 208 |
endpoint = API_ENDPOINTS["pharmgkb_gene"].format(pharmgkb_gene_id)
|
| 209 |
data = _query_api(endpoint)
|
|
@@ -213,23 +201,23 @@ def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
|
|
| 213 |
return None
|
| 214 |
|
| 215 |
def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
|
| 216 |
-
"""
|
| 217 |
if not BIOPORTAL_API_KEY:
|
| 218 |
st.error("BioPortal API key not configured.")
|
| 219 |
return None
|
| 220 |
if not term:
|
| 221 |
-
st.error("Please provide a
|
| 222 |
return None
|
| 223 |
headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
|
| 224 |
params = {"q": term, "ontologies": ontology}
|
| 225 |
data = _query_api(API_ENDPOINTS["bioportal_search"], params, headers)
|
| 226 |
if data and data.get("collection"):
|
| 227 |
return data
|
| 228 |
-
st.warning("No results found
|
| 229 |
return None
|
| 230 |
|
| 231 |
def _save_pdf_report(report_content: str, filename: str):
|
| 232 |
-
"""
|
| 233 |
try:
|
| 234 |
pdf = FPDF()
|
| 235 |
pdf.add_page()
|
|
@@ -238,28 +226,28 @@ def _save_pdf_report(report_content: str, filename: str):
|
|
| 238 |
pdf.output(filename)
|
| 239 |
return filename
|
| 240 |
except Exception as e:
|
| 241 |
-
st.error(f"Error saving PDF
|
| 242 |
-
logging.error(
|
| 243 |
-
|
| 244 |
|
| 245 |
def _display_dataframe(data: List[Dict[str, Any]], columns: List[str]):
|
| 246 |
-
"""
|
| 247 |
if data:
|
| 248 |
df = pd.DataFrame(data, columns=columns)
|
| 249 |
st.dataframe(df)
|
| 250 |
return df
|
| 251 |
-
st.warning("No data available
|
| 252 |
return None
|
| 253 |
|
| 254 |
def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -> go.Figure:
|
| 255 |
-
"""
|
| 256 |
G = nx.Graph()
|
| 257 |
G.add_node(gene, color="lightblue")
|
| 258 |
for variant in variants:
|
| 259 |
G.add_node(variant, color="lightgreen")
|
| 260 |
G.add_edge(gene, variant)
|
| 261 |
for drug in annotations.get(variant, []):
|
| 262 |
-
if drug != "N/A":
|
| 263 |
G.add_node(drug, color="lightcoral")
|
| 264 |
G.add_edge(variant, drug)
|
| 265 |
pos = nx.spring_layout(G)
|
|
@@ -270,11 +258,8 @@ def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -
|
|
| 270 |
edge_x.extend([x0, x1, None])
|
| 271 |
edge_y.extend([y0, y1, None])
|
| 272 |
edge_trace = go.Scatter(
|
| 273 |
-
x=edge_x,
|
| 274 |
-
|
| 275 |
-
line=dict(width=0.5, color="#888"),
|
| 276 |
-
hoverinfo="none",
|
| 277 |
-
mode="lines"
|
| 278 |
)
|
| 279 |
node_x, node_y, node_text, node_color = [], [], [], []
|
| 280 |
for node in G.nodes():
|
|
@@ -284,26 +269,16 @@ def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -
|
|
| 284 |
node_text.append(node)
|
| 285 |
node_color.append(G.nodes[node]["color"])
|
| 286 |
node_trace = go.Scatter(
|
| 287 |
-
x=node_x,
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
text=node_text,
|
| 292 |
-
textposition="bottom center",
|
| 293 |
-
marker=dict(
|
| 294 |
-
showscale=False,
|
| 295 |
-
colorscale="YlGnBu",
|
| 296 |
-
color=node_color,
|
| 297 |
-
size=10,
|
| 298 |
-
line_width=2
|
| 299 |
-
)
|
| 300 |
)
|
| 301 |
fig = go.Figure(
|
| 302 |
data=[edge_trace, node_trace],
|
| 303 |
layout=go.Layout(
|
| 304 |
title=dict(text="Gene-Variant-Drug Network", font=dict(size=16)),
|
| 305 |
-
showlegend=False,
|
| 306 |
-
hovermode="closest",
|
| 307 |
margin=dict(b=20, l=5, r=5, t=40),
|
| 308 |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
| 309 |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
|
|
@@ -312,10 +287,10 @@ def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -
|
|
| 312 |
return fig
|
| 313 |
|
| 314 |
# -----------------------------
|
| 315 |
-
#
|
| 316 |
# -----------------------------
|
| 317 |
def get_rxnorm_rxcui(drug_name: str) -> Optional[str]:
|
| 318 |
-
"""
|
| 319 |
url = f"{API_ENDPOINTS['rxnorm_rxcui']}?name={drug_name}"
|
| 320 |
data = _query_api(url)
|
| 321 |
if data and "idGroup" in data and data["idGroup"].get("rxnormId"):
|
|
@@ -324,20 +299,23 @@ def get_rxnorm_rxcui(drug_name: str) -> Optional[str]:
|
|
| 324 |
return None
|
| 325 |
|
| 326 |
def get_rxnorm_properties(rxcui: str) -> Optional[Dict]:
|
| 327 |
-
"""
|
| 328 |
url = API_ENDPOINTS["rxnorm_properties"].format(rxcui)
|
| 329 |
return _query_api(url)
|
| 330 |
|
| 331 |
def get_rxclass_by_drug_name(drug_name: str) -> Optional[Dict]:
|
| 332 |
-
"""
|
| 333 |
url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
|
| 334 |
-
|
|
|
|
|
|
|
|
|
|
| 335 |
|
| 336 |
# -----------------------------
|
| 337 |
-
#
|
| 338 |
# -----------------------------
|
| 339 |
def generate_drug_insights(drug_name: str) -> str:
|
| 340 |
-
"""Gathers FDA, PubChem, RxNorm, and RxClass
|
| 341 |
# FDA Data
|
| 342 |
fda_info = _get_fda_approval(drug_name)
|
| 343 |
fda_status = "Not Approved"
|
|
@@ -363,12 +341,9 @@ def generate_drug_insights(drug_name: str) -> str:
|
|
| 363 |
|
| 364 |
# RxClass Data
|
| 365 |
rxclass_data = get_rxclass_by_drug_name(drug_name)
|
| 366 |
-
if rxclass_data
|
| 367 |
-
rxclass_info = f"RxClass: {rxclass_data}"
|
| 368 |
-
else:
|
| 369 |
-
rxclass_info = "No RxClass data available."
|
| 370 |
|
| 371 |
-
# Construct prompt for GPT
|
| 372 |
prompt = (
|
| 373 |
f"Drug Analysis Report for '{drug_name}':\n\n"
|
| 374 |
f"**FDA Approval Status:** {fda_status}\n\n"
|
|
@@ -378,20 +353,16 @@ def generate_drug_insights(drug_name: str) -> str:
|
|
| 378 |
f" - Canonical SMILES: {canonical_smiles}\n\n"
|
| 379 |
f"**RxNorm Data:** {rxnorm_info}\n\n"
|
| 380 |
f"**RxClass Data:** {rxclass_info}\n\n"
|
| 381 |
-
f"As an
|
| 382 |
-
f"
|
| 383 |
-
f"
|
| 384 |
-
f"- Potential repurposing opportunities\n"
|
| 385 |
-
f"- Regulatory insights and challenges\n"
|
| 386 |
-
f"- Suggestions for further research and data integration\n\n"
|
| 387 |
-
f"Present your answer in a clear, bullet-point format and feel free to add any novel ideas."
|
| 388 |
)
|
| 389 |
|
| 390 |
insights = generate_content(prompt)
|
| 391 |
return insights
|
| 392 |
|
| 393 |
# -----------------------------
|
| 394 |
-
#
|
| 395 |
# -----------------------------
|
| 396 |
tabs = st.tabs([
|
| 397 |
"💊 Drug Development",
|
|
@@ -404,68 +375,54 @@ tabs = st.tabs([
|
|
| 404 |
"🤖 AI Insights"
|
| 405 |
])
|
| 406 |
|
| 407 |
-
#
|
| 408 |
-
# Tab 1: Drug Development
|
| 409 |
-
# -----------------------------
|
| 410 |
with tabs[0]:
|
| 411 |
st.header("AI-Driven Drug Development Strategy")
|
| 412 |
target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
|
| 413 |
-
target_gene = st.text_input("Target Gene (PharmGKB Accession):", placeholder="
|
| 414 |
strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
|
| 415 |
|
| 416 |
if st.button("Generate Development Plan"):
|
| 417 |
with st.spinner("Generating plan..."):
|
| 418 |
plan_prompt = (
|
| 419 |
-
f"Develop a comprehensive drug development plan for treating {target} "
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
f"and competitive landscape. Highlight key milestones and challenges."
|
| 423 |
)
|
| 424 |
plan = generate_content(plan_prompt)
|
| 425 |
st.subheader("Comprehensive Development Plan")
|
| 426 |
st.markdown(plan)
|
| 427 |
|
| 428 |
-
|
| 429 |
if target:
|
| 430 |
fda_info = _get_fda_approval(target.split()[0])
|
| 431 |
-
st.subheader("FDA Regulatory Insights")
|
| 432 |
if fda_info:
|
| 433 |
st.json(fda_info)
|
| 434 |
else:
|
| 435 |
st.write("No FDA data found for the given target.")
|
| 436 |
|
| 437 |
-
# Pharmacogenomic Considerations via PharmGKB
|
| 438 |
st.subheader("Pharmacogenomic Considerations")
|
| 439 |
if target_gene:
|
| 440 |
if not target_gene.startswith("PA"):
|
| 441 |
-
st.warning("Please provide a valid PharmGKB accession (e.g., PA1234)
|
| 442 |
else:
|
| 443 |
variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
|
| 444 |
if variant_ids:
|
| 445 |
annotations = {}
|
| 446 |
-
for
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
else:
|
| 451 |
-
annotations[variant_id] = []
|
| 452 |
-
st.write(f"### Clinical Annotations for Variant: {variant_id}")
|
| 453 |
-
if pgx_data:
|
| 454 |
-
st.json(pgx_data)
|
| 455 |
-
else:
|
| 456 |
-
st.write(f"No annotations for variant {variant_id}.")
|
| 457 |
else:
|
| 458 |
st.write("No variants found for the specified PharmGKB gene accession.")
|
| 459 |
else:
|
| 460 |
-
st.write("
|
| 461 |
|
| 462 |
-
#
|
| 463 |
-
# Tab 2: Clinical Trial Analytics
|
| 464 |
-
# -----------------------------
|
| 465 |
with tabs[1]:
|
| 466 |
st.header("Clinical Trial Landscape Analytics")
|
| 467 |
trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number")
|
| 468 |
-
|
| 469 |
if st.button("Analyze Trial Landscape"):
|
| 470 |
with st.spinner("Fetching trial data..."):
|
| 471 |
trials = _get_clinical_trials(trial_query)
|
|
@@ -476,15 +433,15 @@ with tabs[1]:
|
|
| 476 |
"Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"),
|
| 477 |
"Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"),
|
| 478 |
"Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["Not Available"])[0],
|
| 479 |
-
"Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "
|
| 480 |
})
|
| 481 |
_display_dataframe(trial_data, list(trial_data[0].keys()))
|
| 482 |
else:
|
| 483 |
-
st.warning("No clinical trials found for the
|
| 484 |
|
| 485 |
ae_data = _analyze_adverse_events(trial_query)
|
| 486 |
if ae_data and ae_data.get("results"):
|
| 487 |
-
st.subheader("Adverse Event Profile (Top 5
|
| 488 |
ae_results = ae_data["results"][:5]
|
| 489 |
ae_df = pd.json_normalize(ae_results)
|
| 490 |
st.dataframe(ae_df)
|
|
@@ -503,17 +460,12 @@ with tabs[1]:
|
|
| 503 |
else:
|
| 504 |
st.write("No adverse event data available.")
|
| 505 |
|
| 506 |
-
#
|
| 507 |
-
# Tab 3: Molecular Profiling
|
| 508 |
-
# -----------------------------
|
| 509 |
with tabs[2]:
|
| 510 |
st.header("Advanced Molecular Profiling")
|
| 511 |
compound_input = st.text_input("Compound Identifier:", placeholder="Enter drug name, SMILES, or INN")
|
| 512 |
-
|
| 513 |
if st.button("Analyze Compound"):
|
| 514 |
with st.spinner("Querying PubChem..."):
|
| 515 |
-
smiles = None
|
| 516 |
-
# If input is a valid SMILES, use it; otherwise attempt to retrieve from PubChem.
|
| 517 |
if Chem.MolFromSmiles(compound_input):
|
| 518 |
smiles = compound_input
|
| 519 |
else:
|
|
@@ -521,10 +473,9 @@ with tabs[2]:
|
|
| 521 |
if smiles:
|
| 522 |
img = _draw_molecule(smiles)
|
| 523 |
if img:
|
| 524 |
-
st.image(img, caption="2D Structure")
|
| 525 |
else:
|
| 526 |
-
st.error("
|
| 527 |
-
|
| 528 |
pubchem_data = _query_api(API_ENDPOINTS["pubchem"].format(compound_input))
|
| 529 |
if pubchem_data and pubchem_data.get("PC_Compounds"):
|
| 530 |
st.subheader("Physicochemical Properties")
|
|
@@ -535,32 +486,24 @@ with tabs[2]:
|
|
| 535 |
else:
|
| 536 |
st.error("Physicochemical properties not available.")
|
| 537 |
|
| 538 |
-
#
|
| 539 |
-
# Tab 4: Regulatory Intelligence
|
| 540 |
-
# -----------------------------
|
| 541 |
with tabs[3]:
|
| 542 |
st.header("Global Regulatory Monitoring")
|
| 543 |
-
st.markdown("**Note:**
|
| 544 |
-
|
| 545 |
-
|
| 546 |
if st.button("Generate Regulatory Report"):
|
| 547 |
with st.spinner("Compiling regulatory data..."):
|
| 548 |
-
|
| 549 |
-
fda_info = _get_fda_approval(drug_name)
|
| 550 |
fda_status = "Not Approved"
|
| 551 |
if fda_info and fda_info.get("openfda", {}).get("brand_name"):
|
| 552 |
fda_status = ", ".join(fda_info["openfda"]["brand_name"])
|
| 553 |
-
|
| 554 |
-
# PubChem Drug Details for Generic/Formula Info
|
| 555 |
-
pubchem_details = _get_pubchem_drug_details(drug_name)
|
| 556 |
if pubchem_details:
|
| 557 |
formula = pubchem_details.get("Molecular Formula", "N/A")
|
| 558 |
iupac = pubchem_details.get("IUPAC Name", "N/A")
|
| 559 |
-
|
| 560 |
else:
|
| 561 |
-
formula = iupac =
|
| 562 |
-
|
| 563 |
-
st.subheader("Regulatory Status & Drug Details")
|
| 564 |
col1, col2 = st.columns(2)
|
| 565 |
with col1:
|
| 566 |
st.markdown("**FDA Status**")
|
|
@@ -569,156 +512,119 @@ with tabs[3]:
|
|
| 569 |
st.markdown("**Drug Details (PubChem)**")
|
| 570 |
st.write(f"**Molecular Formula:** {formula}")
|
| 571 |
st.write(f"**IUPAC Name:** {iupac}")
|
| 572 |
-
st.write(f"**Canonical SMILES:** {
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
f"### Regulatory Report for {drug_name}\n\n"
|
| 576 |
f"**FDA Status:** {fda_status}\n\n"
|
| 577 |
f"**Molecular Formula:** {formula}\n\n"
|
| 578 |
f"**IUPAC Name:** {iupac}\n\n"
|
| 579 |
-
f"**Canonical SMILES:** {
|
| 580 |
)
|
| 581 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as
|
| 582 |
-
|
| 583 |
-
if
|
| 584 |
-
with open(
|
| 585 |
-
st.download_button(
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
mime="application/pdf"
|
| 590 |
-
)
|
| 591 |
-
os.remove(report_file)
|
| 592 |
-
|
| 593 |
-
# -----------------------------
|
| 594 |
-
# Tab 5: Literature Search
|
| 595 |
-
# -----------------------------
|
| 596 |
with tabs[4]:
|
| 597 |
st.header("Literature Search")
|
| 598 |
-
|
| 599 |
if st.button("Search PubMed"):
|
| 600 |
with st.spinner("Searching PubMed..."):
|
| 601 |
-
|
| 602 |
-
if
|
| 603 |
-
|
| 604 |
-
st.subheader(f"Found {len(
|
| 605 |
-
for
|
| 606 |
-
st.markdown(f"- [PMID: {
|
| 607 |
else:
|
| 608 |
st.write("No PubMed results found.")
|
| 609 |
-
|
| 610 |
st.header("Ontology Search")
|
| 611 |
-
|
| 612 |
-
|
| 613 |
if st.button("Search BioPortal"):
|
| 614 |
with st.spinner("Searching BioPortal..."):
|
| 615 |
-
|
| 616 |
-
if
|
| 617 |
-
st.subheader(f"BioPortal Results for {
|
| 618 |
-
for
|
| 619 |
-
label =
|
| 620 |
-
|
| 621 |
-
st.markdown(f"- **{label}** ({
|
| 622 |
else:
|
| 623 |
st.write("No ontology results found.")
|
| 624 |
|
| 625 |
-
#
|
| 626 |
-
# Tab 6: Dashboard
|
| 627 |
-
# -----------------------------
|
| 628 |
with tabs[5]:
|
| 629 |
st.header("Comprehensive Dashboard")
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
col1.metric("FDA Approved Drugs", fda)
|
| 639 |
-
col2.metric("Ongoing Trials", trials)
|
| 640 |
-
col3.metric("Publications", pubs)
|
| 641 |
-
|
| 642 |
-
_create_kpi_dashboard(fda_count, trials_count, pub_count)
|
| 643 |
-
|
| 644 |
st.subheader("Trend Analysis")
|
| 645 |
years = list(range(2000, 2026))
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
sns.lineplot(x=years, y=
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
st.pyplot(
|
| 653 |
-
|
| 654 |
st.subheader("Gene-Variant-Drug Network (Sample)")
|
| 655 |
sample_gene = "CYP2C19"
|
| 656 |
sample_variants = ["rs4244285", "rs12248560"]
|
| 657 |
-
|
| 658 |
-
"rs4244285": ["Clopidogrel", "Omeprazole"],
|
| 659 |
-
"rs12248560": ["Sertraline"]
|
| 660 |
-
}
|
| 661 |
try:
|
| 662 |
-
|
| 663 |
-
st.plotly_chart(
|
| 664 |
except Exception as e:
|
| 665 |
-
st.error(f"
|
| 666 |
|
| 667 |
-
#
|
| 668 |
-
# Tab 7: Drug Data Integration
|
| 669 |
-
# -----------------------------
|
| 670 |
with tabs[6]:
|
| 671 |
st.header("🧪 Drug Data Integration")
|
| 672 |
-
|
| 673 |
-
|
| 674 |
if st.button("Retrieve Drug Data"):
|
| 675 |
-
with st.spinner("Fetching drug data
|
| 676 |
-
|
| 677 |
-
rxnorm_id = get_rxnorm_rxcui(drug_query)
|
| 678 |
if rxnorm_id:
|
| 679 |
-
|
| 680 |
else:
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
# RxClass – Get classes related to the drug
|
| 684 |
-
rxclass_data = get_rxclass_by_drug_name(drug_query)
|
| 685 |
-
|
| 686 |
st.subheader("RxNorm Data")
|
| 687 |
if rxnorm_id:
|
| 688 |
-
st.write(f"RxCUI for {
|
| 689 |
-
if
|
| 690 |
-
st.json(rx_properties)
|
| 691 |
-
else:
|
| 692 |
-
st.write("No RxNorm properties found.")
|
| 693 |
else:
|
| 694 |
-
st.write("No RxCUI found
|
| 695 |
-
|
| 696 |
st.subheader("RxClass Information")
|
| 697 |
-
if
|
| 698 |
-
st.json(
|
| 699 |
else:
|
| 700 |
st.write("No RxClass data found for the given drug.")
|
| 701 |
-
|
| 702 |
-
# PubChem Drug Details for generic information
|
| 703 |
-
pubchem_details = _get_pubchem_drug_details(drug_query)
|
| 704 |
st.subheader("PubChem Drug Details")
|
| 705 |
-
if
|
| 706 |
-
st.write(f"**Molecular Formula:** {
|
| 707 |
-
st.write(f"**IUPAC Name:** {
|
| 708 |
-
st.write(f"**Canonical SMILES:** {
|
| 709 |
else:
|
| 710 |
-
st.write("No PubChem details found
|
| 711 |
|
| 712 |
-
#
|
| 713 |
-
# Tab 8: AI Insights
|
| 714 |
-
# -----------------------------
|
| 715 |
with tabs[7]:
|
| 716 |
st.header("🤖 AI Insights")
|
| 717 |
-
|
| 718 |
if st.button("Generate AI Insights"):
|
| 719 |
-
with st.spinner("Generating
|
| 720 |
-
|
| 721 |
st.subheader("AI-Driven Drug Analysis")
|
| 722 |
-
st.markdown(
|
| 723 |
-
|
| 724 |
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import requests
|
| 3 |
+
from rdkit import Chem
|
| 4 |
+
from rdkit.Chem import Draw
|
| 5 |
import pandas as pd
|
| 6 |
import matplotlib.pyplot as plt
|
| 7 |
import seaborn as sns
|
|
|
|
|
|
|
| 8 |
from fpdf import FPDF
|
| 9 |
import tempfile
|
| 10 |
import logging
|
|
|
|
| 11 |
import os
|
| 12 |
import plotly.graph_objects as go
|
| 13 |
import networkx as nx
|
| 14 |
+
from typing import Optional, Dict, List, Any
|
| 15 |
|
| 16 |
+
# -----------------------------
|
| 17 |
+
# SETUP
|
| 18 |
+
# -----------------------------
|
| 19 |
+
# Must be the very first Streamlit command
|
| 20 |
st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
|
|
|
|
|
|
|
| 21 |
logging.basicConfig(level=logging.ERROR)
|
| 22 |
|
| 23 |
# -----------------------------
|
| 24 |
+
# API ENDPOINTS
|
| 25 |
# -----------------------------
|
| 26 |
API_ENDPOINTS = {
|
| 27 |
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies", # no email required
|
|
|
|
| 29 |
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
|
| 30 |
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
| 31 |
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
|
| 32 |
+
# PharmGKB endpoints – expecting a PharmGKB accession (e.g., PA1234)
|
| 33 |
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
|
| 34 |
"pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
|
| 35 |
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
|
|
|
|
| 37 |
# RxNorm endpoints
|
| 38 |
"rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
|
| 39 |
"rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
|
| 40 |
+
# RxClass endpoint – note: this endpoint sometimes returns 404 if no data are available.
|
| 41 |
"rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
|
| 42 |
}
|
| 43 |
|
| 44 |
+
# -----------------------------
|
| 45 |
+
# Retrieve Secrets
|
| 46 |
+
# -----------------------------
|
| 47 |
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY")
|
| 48 |
BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
|
| 49 |
PUB_EMAIL = st.secrets.get("PUB_EMAIL")
|
| 50 |
OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
|
| 51 |
|
| 52 |
if not PUB_EMAIL:
|
| 53 |
+
st.error("PubMed email (PUB_EMAIL) is not configured.")
|
| 54 |
if not BIOPORTAL_API_KEY:
|
| 55 |
+
st.error("BioPortal API key (BIOPORTAL_API_KEY) is not configured.")
|
| 56 |
if not OPENFDA_KEY:
|
| 57 |
+
st.error("OpenFDA API key (OPENFDA_KEY) is not configured.")
|
| 58 |
if not OPENAI_API_KEY:
|
| 59 |
+
st.error("OpenAI API key (OPENAI_API_KEY) is not configured.")
|
| 60 |
|
| 61 |
+
# -----------------------------
|
| 62 |
+
# Initialize OpenAI Client (GPT-4)
|
| 63 |
+
# -----------------------------
|
| 64 |
from openai import OpenAI
|
| 65 |
+
openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
| 66 |
|
| 67 |
def generate_content(prompt: str) -> str:
|
| 68 |
+
"""Generate content using GPT-4 via the OpenAI API."""
|
| 69 |
try:
|
| 70 |
+
completion = openai_client.chat.completions.create(
|
| 71 |
model="gpt-4",
|
| 72 |
messages=[{"role": "user", "content": prompt}],
|
| 73 |
max_tokens=300
|
|
|
|
| 82 |
# Utility Functions
|
| 83 |
# -----------------------------
|
| 84 |
def _query_api(endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict]:
|
| 85 |
+
"""Handles API requests with error handling."""
|
| 86 |
try:
|
| 87 |
response = requests.get(endpoint, params=params, headers=headers, timeout=15)
|
| 88 |
response.raise_for_status()
|
| 89 |
return response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
except Exception as e:
|
| 91 |
+
st.error(f"API error for {endpoint}: {e}")
|
| 92 |
+
logging.error(f"Error for {endpoint}: {e}")
|
| 93 |
return None
|
| 94 |
|
| 95 |
def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
|
| 96 |
+
"""Retrieve canonical SMILES string from PubChem."""
|
| 97 |
url = API_ENDPOINTS["pubchem"].format(drug_name)
|
| 98 |
data = _query_api(url)
|
| 99 |
+
if data and data.get("PC_Compounds"):
|
| 100 |
for prop in data["PC_Compounds"][0].get("props", []):
|
| 101 |
if prop.get("name") == "Canonical SMILES":
|
| 102 |
return prop["value"]["sval"]
|
| 103 |
return None
|
| 104 |
|
| 105 |
def _draw_molecule(smiles: str) -> Optional[Any]:
|
| 106 |
+
"""Draw a molecule image using RDKit."""
|
| 107 |
try:
|
| 108 |
mol = Chem.MolFromSmiles(smiles)
|
| 109 |
if mol:
|
| 110 |
return Draw.MolToImage(mol)
|
| 111 |
else:
|
| 112 |
+
st.error("Invalid SMILES provided.")
|
|
|
|
| 113 |
except Exception as e:
|
| 114 |
st.error(f"Error drawing molecule: {e}")
|
| 115 |
+
logging.error(e)
|
| 116 |
+
return None
|
| 117 |
|
| 118 |
def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
|
| 119 |
+
"""Retrieve drug details (molecular formula, IUPAC name, canonical SMILES) from PubChem."""
|
| 120 |
url = API_ENDPOINTS["pubchem"].format(drug_name)
|
| 121 |
data = _query_api(url)
|
| 122 |
details = {}
|
|
|
|
| 134 |
return None
|
| 135 |
|
| 136 |
def _get_clinical_trials(query: str) -> Optional[Dict]:
|
| 137 |
+
"""Query ClinicalTrials.gov (no email parameter needed)."""
|
| 138 |
if query.upper().startswith("NCT") and query[3:].isdigit():
|
| 139 |
params = {"id": query, "fmt": "json"}
|
| 140 |
else:
|
|
|
|
| 142 |
return _query_api(API_ENDPOINTS["clinical_trials"], params)
|
| 143 |
|
| 144 |
def _get_pubmed(query: str) -> Optional[Dict]:
|
| 145 |
+
"""Query PubMed using E-utilities."""
|
| 146 |
params = {"db": "pubmed", "term": query, "retmax": 10, "retmode": "json", "email": PUB_EMAIL}
|
| 147 |
return _query_api(API_ENDPOINTS["pubmed"], params)
|
| 148 |
|
| 149 |
def _get_fda_approval(drug_name: str) -> Optional[Dict]:
|
| 150 |
+
"""Retrieve FDA drug approval info using openFDA."""
|
| 151 |
if not OPENFDA_KEY:
|
| 152 |
st.error("OpenFDA API key not configured.")
|
| 153 |
return None
|
|
|
|
| 159 |
return None
|
| 160 |
|
| 161 |
def _analyze_adverse_events(drug_name: str, limit: int = 5) -> Optional[Dict]:
|
| 162 |
+
"""Fetch adverse events from FAERS."""
|
| 163 |
if not OPENFDA_KEY:
|
| 164 |
st.error("OpenFDA API key not configured.")
|
| 165 |
return None
|
|
|
|
| 168 |
return _query_api(API_ENDPOINTS["faers_adverse_events"], params)
|
| 169 |
|
| 170 |
def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
|
| 171 |
+
"""Get clinical annotations for a PharmGKB variant."""
|
| 172 |
endpoint = API_ENDPOINTS["pharmgkb_variant_clinical_annotations"].format(variant_id)
|
| 173 |
data = _query_api(endpoint)
|
| 174 |
if data and data.get("data"):
|
|
|
|
| 177 |
return None
|
| 178 |
|
| 179 |
def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]]:
|
| 180 |
+
"""Retrieve variant IDs for a PharmGKB gene accession (e.g., PA1234)."""
|
|
|
|
|
|
|
|
|
|
| 181 |
if not pharmgkb_gene_id.startswith("PA"):
|
| 182 |
+
st.warning("Please provide a valid PharmGKB accession (e.g., PA1234).")
|
| 183 |
return None
|
| 184 |
endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(pharmgkb_gene_id)
|
| 185 |
data = _query_api(endpoint)
|
|
|
|
| 189 |
return None
|
| 190 |
|
| 191 |
def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
|
| 192 |
+
"""Retrieve PharmGKB gene data."""
|
|
|
|
|
|
|
| 193 |
if not pharmgkb_gene_id.startswith("PA"):
|
| 194 |
+
st.warning("Please enter a valid PharmGKB gene accession (e.g., PA1234).")
|
| 195 |
return None
|
| 196 |
endpoint = API_ENDPOINTS["pharmgkb_gene"].format(pharmgkb_gene_id)
|
| 197 |
data = _query_api(endpoint)
|
|
|
|
| 201 |
return None
|
| 202 |
|
| 203 |
def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
|
| 204 |
+
"""Query BioPortal for ontology data."""
|
| 205 |
if not BIOPORTAL_API_KEY:
|
| 206 |
st.error("BioPortal API key not configured.")
|
| 207 |
return None
|
| 208 |
if not term:
|
| 209 |
+
st.error("Please provide a term for ontology search.")
|
| 210 |
return None
|
| 211 |
headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
|
| 212 |
params = {"q": term, "ontologies": ontology}
|
| 213 |
data = _query_api(API_ENDPOINTS["bioportal_search"], params, headers)
|
| 214 |
if data and data.get("collection"):
|
| 215 |
return data
|
| 216 |
+
st.warning("No BioPortal results found.")
|
| 217 |
return None
|
| 218 |
|
| 219 |
def _save_pdf_report(report_content: str, filename: str):
|
| 220 |
+
"""Save report content as a PDF."""
|
| 221 |
try:
|
| 222 |
pdf = FPDF()
|
| 223 |
pdf.add_page()
|
|
|
|
| 226 |
pdf.output(filename)
|
| 227 |
return filename
|
| 228 |
except Exception as e:
|
| 229 |
+
st.error(f"Error saving PDF: {e}")
|
| 230 |
+
logging.error(e)
|
| 231 |
+
return None
|
| 232 |
|
| 233 |
def _display_dataframe(data: List[Dict[str, Any]], columns: List[str]):
|
| 234 |
+
"""Display a dataframe in Streamlit."""
|
| 235 |
if data:
|
| 236 |
df = pd.DataFrame(data, columns=columns)
|
| 237 |
st.dataframe(df)
|
| 238 |
return df
|
| 239 |
+
st.warning("No data available.")
|
| 240 |
return None
|
| 241 |
|
| 242 |
def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -> go.Figure:
|
| 243 |
+
"""Create a network graph (gene-variant-drug)."""
|
| 244 |
G = nx.Graph()
|
| 245 |
G.add_node(gene, color="lightblue")
|
| 246 |
for variant in variants:
|
| 247 |
G.add_node(variant, color="lightgreen")
|
| 248 |
G.add_edge(gene, variant)
|
| 249 |
for drug in annotations.get(variant, []):
|
| 250 |
+
if drug and drug != "N/A":
|
| 251 |
G.add_node(drug, color="lightcoral")
|
| 252 |
G.add_edge(variant, drug)
|
| 253 |
pos = nx.spring_layout(G)
|
|
|
|
| 258 |
edge_x.extend([x0, x1, None])
|
| 259 |
edge_y.extend([y0, y1, None])
|
| 260 |
edge_trace = go.Scatter(
|
| 261 |
+
x=edge_x, y=edge_y, line=dict(width=0.5, color="#888"),
|
| 262 |
+
hoverinfo="none", mode="lines"
|
|
|
|
|
|
|
|
|
|
| 263 |
)
|
| 264 |
node_x, node_y, node_text, node_color = [], [], [], []
|
| 265 |
for node in G.nodes():
|
|
|
|
| 269 |
node_text.append(node)
|
| 270 |
node_color.append(G.nodes[node]["color"])
|
| 271 |
node_trace = go.Scatter(
|
| 272 |
+
x=node_x, y=node_y, mode="markers+text", hoverinfo="text",
|
| 273 |
+
text=node_text, textposition="bottom center",
|
| 274 |
+
marker=dict(showscale=False, colorscale="YlGnBu",
|
| 275 |
+
color=node_color, size=10, line_width=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
)
|
| 277 |
fig = go.Figure(
|
| 278 |
data=[edge_trace, node_trace],
|
| 279 |
layout=go.Layout(
|
| 280 |
title=dict(text="Gene-Variant-Drug Network", font=dict(size=16)),
|
| 281 |
+
showlegend=False, hovermode="closest",
|
|
|
|
| 282 |
margin=dict(b=20, l=5, r=5, t=40),
|
| 283 |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
| 284 |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
|
|
|
|
| 287 |
return fig
|
| 288 |
|
| 289 |
# -----------------------------
|
| 290 |
+
# RxNorm & RxClass Functions
|
| 291 |
# -----------------------------
|
| 292 |
def get_rxnorm_rxcui(drug_name: str) -> Optional[str]:
|
| 293 |
+
"""Retrieve the RxCUI for a drug name."""
|
| 294 |
url = f"{API_ENDPOINTS['rxnorm_rxcui']}?name={drug_name}"
|
| 295 |
data = _query_api(url)
|
| 296 |
if data and "idGroup" in data and data["idGroup"].get("rxnormId"):
|
|
|
|
| 299 |
return None
|
| 300 |
|
| 301 |
def get_rxnorm_properties(rxcui: str) -> Optional[Dict]:
|
| 302 |
+
"""Retrieve RxNorm properties for a given RxCUI."""
|
| 303 |
url = API_ENDPOINTS["rxnorm_properties"].format(rxcui)
|
| 304 |
return _query_api(url)
|
| 305 |
|
| 306 |
def get_rxclass_by_drug_name(drug_name: str) -> Optional[Dict]:
|
| 307 |
+
"""Retrieve RxClass info for a drug by name; gracefully handle if not found."""
|
| 308 |
url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
|
| 309 |
+
data = _query_api(url)
|
| 310 |
+
if data and "classMember" in data:
|
| 311 |
+
return data
|
| 312 |
+
return None
|
| 313 |
|
| 314 |
# -----------------------------
|
| 315 |
+
# AI-Driven Drug Insights
|
| 316 |
# -----------------------------
|
| 317 |
def generate_drug_insights(drug_name: str) -> str:
|
| 318 |
+
"""Gathers data from FDA, PubChem, RxNorm, and RxClass then uses GPT-4 to generate an innovative analysis."""
|
| 319 |
# FDA Data
|
| 320 |
fda_info = _get_fda_approval(drug_name)
|
| 321 |
fda_status = "Not Approved"
|
|
|
|
| 341 |
|
| 342 |
# RxClass Data
|
| 343 |
rxclass_data = get_rxclass_by_drug_name(drug_name)
|
| 344 |
+
rxclass_info = rxclass_data if rxclass_data else "No RxClass data available."
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
+
# Construct a prompt for GPT-4
|
| 347 |
prompt = (
|
| 348 |
f"Drug Analysis Report for '{drug_name}':\n\n"
|
| 349 |
f"**FDA Approval Status:** {fda_status}\n\n"
|
|
|
|
| 353 |
f" - Canonical SMILES: {canonical_smiles}\n\n"
|
| 354 |
f"**RxNorm Data:** {rxnorm_info}\n\n"
|
| 355 |
f"**RxClass Data:** {rxclass_info}\n\n"
|
| 356 |
+
f"As an innovative pharmacogenomics researcher and AI expert, please provide a comprehensive analysis of '{drug_name}', "
|
| 357 |
+
f"including pharmacogenomic considerations, potential repurposing opportunities, regulatory insights, and suggestions for further research. "
|
| 358 |
+
f"Present your answer in bullet points."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
)
|
| 360 |
|
| 361 |
insights = generate_content(prompt)
|
| 362 |
return insights
|
| 363 |
|
| 364 |
# -----------------------------
|
| 365 |
+
# STREAMLIT APP TABS
|
| 366 |
# -----------------------------
|
| 367 |
tabs = st.tabs([
|
| 368 |
"💊 Drug Development",
|
|
|
|
| 375 |
"🤖 AI Insights"
|
| 376 |
])
|
| 377 |
|
| 378 |
+
# ----- Tab 1: Drug Development -----
|
|
|
|
|
|
|
| 379 |
with tabs[0]:
|
| 380 |
st.header("AI-Driven Drug Development Strategy")
|
| 381 |
target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
|
| 382 |
+
target_gene = st.text_input("Target Gene (PharmGKB Accession):", placeholder="e.g., PA1234")
|
| 383 |
strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
|
| 384 |
|
| 385 |
if st.button("Generate Development Plan"):
|
| 386 |
with st.spinner("Generating plan..."):
|
| 387 |
plan_prompt = (
|
| 388 |
+
f"Develop a comprehensive drug development plan for treating {target} using a {strategy} strategy. "
|
| 389 |
+
"Include sections on target validation, lead optimization, preclinical testing, clinical trial design, "
|
| 390 |
+
"regulatory submission strategy, market analysis, and competitive landscape. Highlight key milestones and challenges."
|
|
|
|
| 391 |
)
|
| 392 |
plan = generate_content(plan_prompt)
|
| 393 |
st.subheader("Comprehensive Development Plan")
|
| 394 |
st.markdown(plan)
|
| 395 |
|
| 396 |
+
st.subheader("FDA Regulatory Insights")
|
| 397 |
if target:
|
| 398 |
fda_info = _get_fda_approval(target.split()[0])
|
|
|
|
| 399 |
if fda_info:
|
| 400 |
st.json(fda_info)
|
| 401 |
else:
|
| 402 |
st.write("No FDA data found for the given target.")
|
| 403 |
|
|
|
|
| 404 |
st.subheader("Pharmacogenomic Considerations")
|
| 405 |
if target_gene:
|
| 406 |
if not target_gene.startswith("PA"):
|
| 407 |
+
st.warning("Please provide a valid PharmGKB accession (e.g., PA1234).")
|
| 408 |
else:
|
| 409 |
variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
|
| 410 |
if variant_ids:
|
| 411 |
annotations = {}
|
| 412 |
+
for vid in variant_ids[:5]:
|
| 413 |
+
pgx = _get_pharmgkb_clinical_annotations(vid)
|
| 414 |
+
annotations[vid] = [anno.get("obj2Name", "N/A") for anno in pgx.get("data", [])] if pgx else []
|
| 415 |
+
st.write(f"### Annotations for Variant: {vid}")
|
| 416 |
+
st.json(pgx if pgx else {"message": "No annotations found."})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
else:
|
| 418 |
st.write("No variants found for the specified PharmGKB gene accession.")
|
| 419 |
else:
|
| 420 |
+
st.write("Enter a PharmGKB gene accession to retrieve pharmacogenomic data.")
|
| 421 |
|
| 422 |
+
# ----- Tab 2: Clinical Trial Analytics -----
|
|
|
|
|
|
|
| 423 |
with tabs[1]:
|
| 424 |
st.header("Clinical Trial Landscape Analytics")
|
| 425 |
trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number")
|
|
|
|
| 426 |
if st.button("Analyze Trial Landscape"):
|
| 427 |
with st.spinner("Fetching trial data..."):
|
| 428 |
trials = _get_clinical_trials(trial_query)
|
|
|
|
| 433 |
"Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"),
|
| 434 |
"Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"),
|
| 435 |
"Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["Not Available"])[0],
|
| 436 |
+
"Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "N/A")
|
| 437 |
})
|
| 438 |
_display_dataframe(trial_data, list(trial_data[0].keys()))
|
| 439 |
else:
|
| 440 |
+
st.warning("No clinical trials found for the query.")
|
| 441 |
|
| 442 |
ae_data = _analyze_adverse_events(trial_query)
|
| 443 |
if ae_data and ae_data.get("results"):
|
| 444 |
+
st.subheader("Adverse Event Profile (Top 5)")
|
| 445 |
ae_results = ae_data["results"][:5]
|
| 446 |
ae_df = pd.json_normalize(ae_results)
|
| 447 |
st.dataframe(ae_df)
|
|
|
|
| 460 |
else:
|
| 461 |
st.write("No adverse event data available.")
|
| 462 |
|
| 463 |
+
# ----- Tab 3: Molecular Profiling -----
|
|
|
|
|
|
|
| 464 |
with tabs[2]:
|
| 465 |
st.header("Advanced Molecular Profiling")
|
| 466 |
compound_input = st.text_input("Compound Identifier:", placeholder="Enter drug name, SMILES, or INN")
|
|
|
|
| 467 |
if st.button("Analyze Compound"):
|
| 468 |
with st.spinner("Querying PubChem..."):
|
|
|
|
|
|
|
| 469 |
if Chem.MolFromSmiles(compound_input):
|
| 470 |
smiles = compound_input
|
| 471 |
else:
|
|
|
|
| 473 |
if smiles:
|
| 474 |
img = _draw_molecule(smiles)
|
| 475 |
if img:
|
| 476 |
+
st.image(img, caption="2D Molecular Structure")
|
| 477 |
else:
|
| 478 |
+
st.error("Structure not found. Please try a more specific compound name.")
|
|
|
|
| 479 |
pubchem_data = _query_api(API_ENDPOINTS["pubchem"].format(compound_input))
|
| 480 |
if pubchem_data and pubchem_data.get("PC_Compounds"):
|
| 481 |
st.subheader("Physicochemical Properties")
|
|
|
|
| 486 |
else:
|
| 487 |
st.error("Physicochemical properties not available.")
|
| 488 |
|
| 489 |
+
# ----- Tab 4: Regulatory Intelligence -----
|
|
|
|
|
|
|
| 490 |
with tabs[3]:
|
| 491 |
st.header("Global Regulatory Monitoring")
|
| 492 |
+
st.markdown("**Note:** This section now focuses on FDA data and generic drug details from PubChem.")
|
| 493 |
+
drug_prod = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
|
|
|
|
| 494 |
if st.button("Generate Regulatory Report"):
|
| 495 |
with st.spinner("Compiling regulatory data..."):
|
| 496 |
+
fda_info = _get_fda_approval(drug_prod)
|
|
|
|
| 497 |
fda_status = "Not Approved"
|
| 498 |
if fda_info and fda_info.get("openfda", {}).get("brand_name"):
|
| 499 |
fda_status = ", ".join(fda_info["openfda"]["brand_name"])
|
| 500 |
+
pubchem_details = _get_pubchem_drug_details(drug_prod)
|
|
|
|
|
|
|
| 501 |
if pubchem_details:
|
| 502 |
formula = pubchem_details.get("Molecular Formula", "N/A")
|
| 503 |
iupac = pubchem_details.get("IUPAC Name", "N/A")
|
| 504 |
+
canon_smiles = pubchem_details.get("Canonical SMILES", "N/A")
|
| 505 |
else:
|
| 506 |
+
formula = iupac = canon_smiles = "Not Available"
|
|
|
|
|
|
|
| 507 |
col1, col2 = st.columns(2)
|
| 508 |
with col1:
|
| 509 |
st.markdown("**FDA Status**")
|
|
|
|
| 512 |
st.markdown("**Drug Details (PubChem)**")
|
| 513 |
st.write(f"**Molecular Formula:** {formula}")
|
| 514 |
st.write(f"**IUPAC Name:** {iupac}")
|
| 515 |
+
st.write(f"**Canonical SMILES:** {canon_smiles}")
|
| 516 |
+
report_text = (
|
| 517 |
+
f"### Regulatory Report for {drug_prod}\n\n"
|
|
|
|
| 518 |
f"**FDA Status:** {fda_status}\n\n"
|
| 519 |
f"**Molecular Formula:** {formula}\n\n"
|
| 520 |
f"**IUPAC Name:** {iupac}\n\n"
|
| 521 |
+
f"**Canonical SMILES:** {canon_smiles}\n"
|
| 522 |
)
|
| 523 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 524 |
+
pdf_file = _save_pdf_report(report_text, tmp.name)
|
| 525 |
+
if pdf_file:
|
| 526 |
+
with open(pdf_file, "rb") as f:
|
| 527 |
+
st.download_button("Download Regulatory Report (PDF)", data=f, file_name=f"{drug_prod}_report.pdf", mime="application/pdf")
|
| 528 |
+
os.remove(pdf_file)
|
| 529 |
+
|
| 530 |
+
# ----- Tab 5: Literature Search -----
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
with tabs[4]:
|
| 532 |
st.header("Literature Search")
|
| 533 |
+
lit_query = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics")
|
| 534 |
if st.button("Search PubMed"):
|
| 535 |
with st.spinner("Searching PubMed..."):
|
| 536 |
+
pubmed_results = _get_pubmed(lit_query)
|
| 537 |
+
if pubmed_results and pubmed_results.get("esearchresult", {}).get("idlist"):
|
| 538 |
+
id_list = pubmed_results["esearchresult"]["idlist"]
|
| 539 |
+
st.subheader(f"Found {len(id_list)} PubMed Results")
|
| 540 |
+
for pmid in id_list:
|
| 541 |
+
st.markdown(f"- [PMID: {pmid}](https://pubmed.ncbi.nlm.nih.gov/{pmid}/)")
|
| 542 |
else:
|
| 543 |
st.write("No PubMed results found.")
|
|
|
|
| 544 |
st.header("Ontology Search")
|
| 545 |
+
ont_query = st.text_input("Enter search query for Ontology:", placeholder="e.g., Alzheimer's disease")
|
| 546 |
+
ont_select = st.selectbox("Select Ontology", ["MESH", "NCIT", "GO", "SNOMEDCT"])
|
| 547 |
if st.button("Search BioPortal"):
|
| 548 |
with st.spinner("Searching BioPortal..."):
|
| 549 |
+
bioportal_results = _get_bioportal_data(ont_select, ont_query)
|
| 550 |
+
if bioportal_results and bioportal_results.get("collection"):
|
| 551 |
+
st.subheader(f"BioPortal Results for {ont_select}")
|
| 552 |
+
for item in bioportal_results["collection"]:
|
| 553 |
+
label = item.get("prefLabel", "N/A")
|
| 554 |
+
ont_id = item.get("@id", "N/A")
|
| 555 |
+
st.markdown(f"- **{label}** ({ont_id})")
|
| 556 |
else:
|
| 557 |
st.write("No ontology results found.")
|
| 558 |
|
| 559 |
+
# ----- Tab 6: Dashboard -----
|
|
|
|
|
|
|
| 560 |
with tabs[5]:
|
| 561 |
st.header("Comprehensive Dashboard")
|
| 562 |
+
# Placeholder KPIs – replace with real-time aggregated data if available
|
| 563 |
+
kpi_fda = 5000
|
| 564 |
+
kpi_trials = 12000
|
| 565 |
+
kpi_pubs = 250000
|
| 566 |
+
col1, col2, col3 = st.columns(3)
|
| 567 |
+
col1.metric("FDA Approved Drugs", kpi_fda)
|
| 568 |
+
col2.metric("Ongoing Trials", kpi_trials)
|
| 569 |
+
col3.metric("Publications", kpi_pubs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
st.subheader("Trend Analysis")
|
| 571 |
years = list(range(2000, 2026))
|
| 572 |
+
approvals = [kpi_fda // len(years)] * len(years) # Sample static data
|
| 573 |
+
fig_trend, ax_trend = plt.subplots(figsize=(10, 6))
|
| 574 |
+
sns.lineplot(x=years, y=approvals, marker="o", ax=ax_trend)
|
| 575 |
+
ax_trend.set_title("FDA Approvals Over Time")
|
| 576 |
+
ax_trend.set_xlabel("Year")
|
| 577 |
+
ax_trend.set_ylabel("Number of Approvals")
|
| 578 |
+
st.pyplot(fig_trend)
|
|
|
|
| 579 |
st.subheader("Gene-Variant-Drug Network (Sample)")
|
| 580 |
sample_gene = "CYP2C19"
|
| 581 |
sample_variants = ["rs4244285", "rs12248560"]
|
| 582 |
+
sample_annots = {"rs4244285": ["Clopidogrel", "Omeprazole"], "rs12248560": ["Sertraline"]}
|
|
|
|
|
|
|
|
|
|
| 583 |
try:
|
| 584 |
+
net_fig = _create_variant_network(sample_gene, sample_variants, sample_annots)
|
| 585 |
+
st.plotly_chart(net_fig, use_container_width=True)
|
| 586 |
except Exception as e:
|
| 587 |
+
st.error(f"Network graph error: {e}")
|
| 588 |
|
| 589 |
+
# ----- Tab 7: Drug Data Integration -----
|
|
|
|
|
|
|
| 590 |
with tabs[6]:
|
| 591 |
st.header("🧪 Drug Data Integration")
|
| 592 |
+
drug_integration = st.text_input("Enter Drug Name for API Integration:", placeholder="e.g., aspirin")
|
|
|
|
| 593 |
if st.button("Retrieve Drug Data"):
|
| 594 |
+
with st.spinner("Fetching drug data..."):
|
| 595 |
+
rxnorm_id = get_rxnorm_rxcui(drug_integration)
|
|
|
|
| 596 |
if rxnorm_id:
|
| 597 |
+
rx_props = get_rxnorm_properties(rxnorm_id)
|
| 598 |
else:
|
| 599 |
+
rx_props = None
|
| 600 |
+
rxclass_info = get_rxclass_by_drug_name(drug_integration)
|
|
|
|
|
|
|
|
|
|
| 601 |
st.subheader("RxNorm Data")
|
| 602 |
if rxnorm_id:
|
| 603 |
+
st.write(f"RxCUI for {drug_integration}: {rxnorm_id}")
|
| 604 |
+
st.json(rx_props if rx_props else {"message": "No RxNorm properties found."})
|
|
|
|
|
|
|
|
|
|
| 605 |
else:
|
| 606 |
+
st.write("No RxCUI found.")
|
|
|
|
| 607 |
st.subheader("RxClass Information")
|
| 608 |
+
if rxclass_info:
|
| 609 |
+
st.json(rxclass_info)
|
| 610 |
else:
|
| 611 |
st.write("No RxClass data found for the given drug.")
|
| 612 |
+
pubchem_info = _get_pubchem_drug_details(drug_integration)
|
|
|
|
|
|
|
| 613 |
st.subheader("PubChem Drug Details")
|
| 614 |
+
if pubchem_info:
|
| 615 |
+
st.write(f"**Molecular Formula:** {pubchem_info.get('Molecular Formula', 'N/A')}")
|
| 616 |
+
st.write(f"**IUPAC Name:** {pubchem_info.get('IUPAC Name', 'N/A')}")
|
| 617 |
+
st.write(f"**Canonical SMILES:** {pubchem_info.get('Canonical SMILES', 'N/A')}")
|
| 618 |
else:
|
| 619 |
+
st.write("No PubChem details found.")
|
| 620 |
|
| 621 |
+
# ----- Tab 8: AI Insights -----
|
|
|
|
|
|
|
| 622 |
with tabs[7]:
|
| 623 |
st.header("🤖 AI Insights")
|
| 624 |
+
ai_drug = st.text_input("Enter Drug Name for AI-Driven Analysis:", placeholder="e.g., tylenol")
|
| 625 |
if st.button("Generate AI Insights"):
|
| 626 |
+
with st.spinner("Generating insights..."):
|
| 627 |
+
insights_text = generate_drug_insights(ai_drug)
|
| 628 |
st.subheader("AI-Driven Drug Analysis")
|
| 629 |
+
st.markdown(insights_text)
|
|
|
|
| 630 |
|