|
|
|
|
|
|
|
|
|
|
|
import streamlit as st |
|
import requests |
|
from rdkit import Chem |
|
from rdkit.Chem import Draw |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from fpdf import FPDF |
|
import tempfile |
|
import logging |
|
import os |
|
import plotly.graph_objects as go |
|
import networkx as nx |
|
from typing import Optional, Dict, List, Any, Tuple |
|
from openai import OpenAI |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
|
handlers=[ |
|
logging.FileHandler("pris_debug.log", mode='w'), |
|
logging.StreamHandler() |
|
] |
|
) |
|
logger = logging.getLogger("PRIS") |
|
|
|
|
|
|
|
|
|
API_ENDPOINTS: Dict[str, str] = { |
|
|
|
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies", |
|
"fda_drug_approval": "https://api.fda.gov/drug/label.json", |
|
"faers_adverse_events": "https://api.fda.gov/drug/event.json", |
|
|
|
|
|
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON", |
|
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", |
|
|
|
|
|
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations", |
|
"pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}", |
|
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants", |
|
|
|
|
|
"bioportal_search": "https://data.bioontology.org/search", |
|
|
|
|
|
"rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json", |
|
"rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json", |
|
"rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json" |
|
} |
|
|
|
DEFAULT_HEADERS: Dict[str, str] = { |
|
"User-Agent": "PharmaResearchIntelligenceSuite/1.0 (Professional Use)", |
|
"Accept": "application/json" |
|
} |
|
|
|
|
|
|
|
|
|
class APIConfigurationError(Exception): |
|
"""Custom exception for missing or misconfigured API credentials.""" |
|
pass |
|
|
|
try: |
|
|
|
OPENAI_API_KEY: str = st.secrets["OPENAI_API_KEY"] |
|
BIOPORTAL_API_KEY: str = st.secrets["BIOPORTAL_API_KEY"] |
|
PUB_EMAIL: str = st.secrets["PUB_EMAIL"] |
|
OPENFDA_KEY: str = st.secrets["OPENFDA_KEY"] |
|
|
|
|
|
if not all([OPENAI_API_KEY, BIOPORTAL_API_KEY, PUB_EMAIL, OPENFDA_KEY]): |
|
raise APIConfigurationError("One or more required API credentials are missing.") |
|
|
|
except (KeyError, APIConfigurationError) as e: |
|
st.error(f"Critical configuration error: {str(e)}") |
|
logger.critical(f"Configuration error: {str(e)}") |
|
st.stop() |
|
|
|
|
|
|
|
|
|
class PharmaResearchEngine: |
|
""" |
|
Core engine for integrating and analyzing pharmaceutical data. |
|
|
|
This engine provides utility functions for API requests and chemical data extraction, |
|
facilitating the seamless integration of multi-omics and clinical datasets. |
|
""" |
|
|
|
def __init__(self) -> None: |
|
|
|
self.openai_client = OpenAI(api_key=OPENAI_API_KEY) |
|
logger.info("PharmaResearchEngine initialized with OpenAI client.") |
|
|
|
@staticmethod |
|
def api_request(endpoint: str, |
|
params: Optional[Dict[str, Any]] = None, |
|
headers: Optional[Dict[str, str]] = None) -> Optional[Dict[str, Any]]: |
|
""" |
|
Perform a resilient API GET request. |
|
|
|
Args: |
|
endpoint (str): The URL endpoint for the API. |
|
params (Optional[Dict[str, Any]]): Query parameters to be included in the request. |
|
headers (Optional[Dict[str, str]]): Additional headers to include in the request. |
|
|
|
Returns: |
|
Optional[Dict[str, Any]]: JSON response from the API, or None if an error occurs. |
|
""" |
|
try: |
|
logger.debug(f"Requesting data from {endpoint} with params: {params}") |
|
response = requests.get( |
|
endpoint, |
|
params=params, |
|
headers={**DEFAULT_HEADERS, **(headers or {})}, |
|
timeout=(3.05, 15) |
|
) |
|
response.raise_for_status() |
|
logger.info(f"Successful API request to {endpoint}") |
|
return response.json() |
|
except requests.exceptions.HTTPError as http_err: |
|
logger.error(f"HTTP Error {http_err.response.status_code} for {endpoint}: {http_err}") |
|
st.error(f"API HTTP Error: {http_err.response.status_code} - {http_err.response.reason}") |
|
except Exception as e: |
|
logger.error(f"Network error during API request to {endpoint}: {str(e)}") |
|
st.error(f"Network error: {str(e)}") |
|
return None |
|
|
|
def get_compound_profile(self, identifier: str) -> Optional[Dict[str, str]]: |
|
""" |
|
Retrieve a comprehensive chemical profile for a given compound. |
|
|
|
This method queries the PubChem API using a provided identifier (name or SMILES) |
|
and extracts key molecular properties. |
|
|
|
Args: |
|
identifier (str): The compound name or SMILES string. |
|
|
|
Returns: |
|
Optional[Dict[str, str]]: A dictionary containing molecular formula, IUPAC name, |
|
canonical SMILES, molecular weight, and LogP. Returns None if data is unavailable. |
|
""" |
|
formatted_endpoint = API_ENDPOINTS["pubchem"].format(identifier) |
|
logger.info(f"Fetching compound profile from PubChem for identifier: {identifier}") |
|
pubchem_data = self.api_request(formatted_endpoint) |
|
|
|
if not pubchem_data or not pubchem_data.get("PC_Compounds"): |
|
logger.warning("No compound data found in PubChem response.") |
|
return None |
|
|
|
compound = pubchem_data["PC_Compounds"][0] |
|
profile = { |
|
'molecular_formula': self._extract_property(compound, 'Molecular Formula'), |
|
'iupac_name': self._extract_property(compound, 'IUPAC Name'), |
|
'canonical_smiles': self._extract_property(compound, 'Canonical SMILES'), |
|
'molecular_weight': self._extract_property(compound, 'Molecular Weight'), |
|
'logp': self._extract_property(compound, 'LogP') |
|
} |
|
logger.debug(f"Extracted compound profile: {profile}") |
|
return profile |
|
|
|
def _extract_property(self, compound: Dict[str, Any], prop_name: str) -> str: |
|
""" |
|
Helper function to extract a specific property from PubChem compound data. |
|
|
|
Args: |
|
compound (Dict[str, Any]): The compound data dictionary from PubChem. |
|
prop_name (str): The name of the property to extract. |
|
|
|
Returns: |
|
str: The extracted property value as a string, or "N/A" if not found. |
|
""" |
|
for prop in compound.get("props", []): |
|
if prop.get("urn", {}).get("label") == prop_name: |
|
|
|
return str(prop["value"].get("sval", "N/A")) |
|
logger.debug(f"Property '{prop_name}' not found for compound.") |
|
return "N/A" |
|
|
|
|
|
|
|
|
|
class ClinicalIntelligence: |
|
""" |
|
Module for analyzing clinical trial landscapes and regulatory data. |
|
|
|
This class encapsulates methods for retrieving and processing clinical trial data |
|
and FDA drug approval information. |
|
""" |
|
|
|
def __init__(self) -> None: |
|
self.engine = PharmaResearchEngine() |
|
logger.info("ClinicalIntelligence module initialized.") |
|
|
|
def get_trial_landscape(self, query: str) -> List[Dict[str, Any]]: |
|
""" |
|
Analyze the clinical trial landscape for a specified query. |
|
|
|
Args: |
|
query (str): A search term (condition, intervention, or NCT number) for clinical trials. |
|
|
|
Returns: |
|
List[Dict[str, Any]]: A list of dictionaries representing the top clinical trials. |
|
""" |
|
|
|
params = {"query.term": query, "retmax": 10} if not query.startswith("NCT") else {"id": query} |
|
logger.info(f"Fetching clinical trials with query: {query}") |
|
trials = self.engine.api_request(API_ENDPOINTS["clinical_trials"], params=params) |
|
|
|
|
|
trial_list = trials.get("studies", [])[:5] if trials else [] |
|
logger.debug(f"Retrieved {len(trial_list)} clinical trials for query '{query}'") |
|
return trial_list |
|
|
|
def get_fda_approval(self, drug_name: str) -> Optional[Dict[str, Any]]: |
|
""" |
|
Retrieve FDA approval information for a specified drug. |
|
|
|
Args: |
|
drug_name (str): The name of the drug to query. |
|
|
|
Returns: |
|
Optional[Dict[str, Any]]: A dictionary containing FDA approval details or None if unavailable. |
|
""" |
|
if not OPENFDA_KEY: |
|
st.error("OpenFDA API key not configured.") |
|
logger.error("Missing OpenFDA API key.") |
|
return None |
|
|
|
params: Dict[str, Any] = { |
|
"api_key": OPENFDA_KEY, |
|
"search": f'openfda.brand_name:"{drug_name}"', |
|
"limit": 1 |
|
} |
|
logger.info(f"Fetching FDA approval data for drug: {drug_name}") |
|
data = self.engine.api_request(API_ENDPOINTS["fda_drug_approval"], params=params) |
|
|
|
if data and data.get("results"): |
|
logger.debug(f"FDA approval data retrieved for drug: {drug_name}") |
|
return data["results"][0] |
|
logger.warning(f"No FDA approval data found for drug: {drug_name}") |
|
return None |
|
|
|
class AIDrugInnovator: |
|
""" |
|
AI-Driven Drug Development Strategist powered by GPT-4. |
|
|
|
This module leverages advanced language models to generate innovative drug development |
|
strategies tailored to specific targets and therapeutic paradigms. |
|
""" |
|
|
|
def __init__(self) -> None: |
|
self.engine = PharmaResearchEngine() |
|
logger.info("AIDrugInnovator module initialized with GPT-4 integration.") |
|
|
|
def generate_strategy(self, target: str, strategy: str) -> str: |
|
""" |
|
Generate an AI-driven development strategy. |
|
|
|
Constructs a detailed prompt for GPT-4 to generate a strategic plan including |
|
target validation, lead optimization, clinical trial design, regulatory analysis, |
|
and commercial potential assessment. |
|
|
|
Args: |
|
target (str): The target disease, pathway, or biological entity. |
|
strategy (str): The desired development paradigm (e.g., "First-in-class"). |
|
|
|
Returns: |
|
str: A formatted strategic blueprint in Markdown. |
|
""" |
|
prompt: str = ( |
|
f"As Chief Scientific Officer at a leading pharmaceutical company, " |
|
f"develop a {strategy} development strategy for the target: {target}.\n\n" |
|
"Include the following sections:\n" |
|
"- **Target Validation Approach:** Describe methods to confirm the target's role in the disease.\n" |
|
"- **Lead Optimization Tactics:** Outline strategies for refining lead compounds.\n" |
|
"- **Clinical Trial Design:** Propose innovative trial designs and endpoints.\n" |
|
"- **Regulatory Pathway Analysis:** Evaluate the regulatory strategy and compliance roadmap.\n" |
|
"- **Commercial Potential Assessment:** Analyze market opportunity and competitive landscape.\n\n" |
|
"Please format your response in Markdown with clear, well-defined sections." |
|
) |
|
|
|
logger.info(f"Generating AI strategy for target: {target} using paradigm: {strategy}") |
|
try: |
|
response = self.engine.openai_client.chat.completions.create( |
|
model="gpt-4", |
|
messages=[{"role": "user", "content": prompt}], |
|
temperature=0.7, |
|
max_tokens=1500 |
|
) |
|
generated_strategy = response.choices[0].message.content |
|
logger.debug("AI strategy generation successful.") |
|
return generated_strategy |
|
except Exception as e: |
|
logger.error(f"Error during AI strategy generation: {str(e)}") |
|
return "Strategy generation failed. Please check API configuration and try again." |
|
|
|
|
|
|
|
|
|
class PharmaResearchInterface: |
|
""" |
|
User Interface for the Pharma Research Intelligence Suite. |
|
|
|
This class configures and renders the Streamlit application, providing an interactive |
|
environment for exploring drug innovation, clinical trial analytics, compound profiling, |
|
regulatory insights, and AI-driven strategy generation. |
|
""" |
|
|
|
def __init__(self) -> None: |
|
self.clinical_intel = ClinicalIntelligence() |
|
self.ai_innovator = AIDrugInnovator() |
|
self._configure_page() |
|
logger.info("PharmaResearchInterface initialized and page configured.") |
|
|
|
def _configure_page(self) -> None: |
|
""" |
|
Configure the Streamlit page settings and apply custom CSS styles. |
|
""" |
|
st.set_page_config( |
|
page_title="PRIS - Pharma Research Intelligence Suite", |
|
layout="wide", |
|
initial_sidebar_state="expanded" |
|
) |
|
st.markdown(""" |
|
<style> |
|
.main {background-color: #f9f9f9; padding: 20px;} |
|
.stAlert {padding: 20px; border: 1px solid #e0e0e0; border-radius: 5px; background-color: #fff;} |
|
.reportview-container .markdown-text-container {font-family: 'Arial', sans-serif; line-height: 1.6;} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
logger.info("Streamlit page configuration completed.") |
|
|
|
def render(self) -> None: |
|
""" |
|
Render the complete Streamlit user interface with multiple functional tabs. |
|
""" |
|
st.title("Pharma Research Intelligence Suite") |
|
self._render_navigation() |
|
logger.info("User interface rendered successfully.") |
|
|
|
def _render_navigation(self) -> None: |
|
""" |
|
Create a dynamic, tab-based navigation layout for different modules. |
|
""" |
|
tabs = st.tabs([ |
|
"🚀 Drug Innovation", |
|
"📈 Trial Analytics", |
|
"🧪 Compound Profiler", |
|
"📜 Regulatory Hub", |
|
"🤖 AI Strategist" |
|
]) |
|
|
|
with tabs[0]: |
|
self._drug_innovation() |
|
with tabs[1]: |
|
self._trial_analytics() |
|
with tabs[2]: |
|
self._compound_profiler() |
|
with tabs[3]: |
|
self._regulatory_hub() |
|
with tabs[4]: |
|
self._ai_strategist() |
|
|
|
def _drug_innovation(self) -> None: |
|
""" |
|
Render the drug innovation module that generates AI-powered development strategies. |
|
""" |
|
st.header("AI-Powered Drug Innovation Engine") |
|
col1, col2 = st.columns([1, 3]) |
|
|
|
with col1: |
|
target = st.text_input("Target Pathobiology:", placeholder="e.g., EGFR mutant NSCLC") |
|
strategy = st.selectbox("Development Paradigm:", |
|
["First-in-class", "Fast-follower", "Biologic", "ADC", "Gene Therapy"]) |
|
if st.button("Generate Development Blueprint"): |
|
with st.spinner("Formulating strategic plan..."): |
|
blueprint = self.ai_innovator.generate_strategy(target, strategy) |
|
st.markdown(blueprint, unsafe_allow_html=True) |
|
logger.info("Drug innovation strategy generated and displayed.") |
|
|
|
def _trial_analytics(self) -> None: |
|
""" |
|
Render the clinical trial analytics module to explore current trial landscapes. |
|
""" |
|
st.header("Clinical Trial Landscape Analysis") |
|
trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number") |
|
|
|
if st.button("Analyze Trial Landscape"): |
|
with st.spinner("Fetching trial data..."): |
|
trials = self.clinical_intel.get_trial_landscape(trial_query) |
|
|
|
if trials: |
|
st.subheader("Top 5 Clinical Trials") |
|
trial_data: List[Dict[str, Any]] = [] |
|
for study in trials: |
|
trial_data.append({ |
|
"Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"), |
|
"Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"), |
|
"Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["N/A"])[0], |
|
"Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "N/A") |
|
}) |
|
|
|
|
|
df = pd.DataFrame(trial_data) |
|
st.dataframe(df) |
|
|
|
|
|
st.subheader("Trial Phase Distribution") |
|
phase_counts = df["Phase"].value_counts() |
|
fig, ax = plt.subplots() |
|
sns.barplot(x=phase_counts.index, y=phase_counts.values, ax=ax) |
|
ax.set_xlabel("Trial Phase") |
|
ax.set_ylabel("Number of Trials") |
|
st.pyplot(fig) |
|
logger.info("Clinical trial analytics displayed successfully.") |
|
else: |
|
st.warning("No clinical trials found for the query.") |
|
logger.warning("No clinical trial data returned from API.") |
|
|
|
def _compound_profiler(self) -> None: |
|
""" |
|
Render the multi-omics compound profiler module for in-depth chemical analysis. |
|
""" |
|
st.header("Multi-Omics Compound Profiler") |
|
compound = st.text_input("Analyze Compound:", placeholder="Enter drug name or SMILES") |
|
|
|
if compound: |
|
with st.spinner("Decoding molecular profile..."): |
|
profile = PharmaResearchEngine().get_compound_profile(compound) |
|
|
|
if profile: |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.subheader("Structural Insights") |
|
mol = Chem.MolFromSmiles(profile['canonical_smiles']) |
|
if mol: |
|
|
|
img = Draw.MolToImage(mol, size=(400, 300)) |
|
st.image(img, caption="2D Molecular Structure") |
|
else: |
|
st.warning("Unable to render molecular structure from SMILES.") |
|
logger.warning("RDKit failed to create molecule from SMILES.") |
|
|
|
with col2: |
|
st.subheader("Physicochemical Profile") |
|
st.metric("Molecular Weight", profile['molecular_weight']) |
|
st.metric("LogP", profile['logp']) |
|
st.metric("IUPAC Name", profile['iupac_name']) |
|
st.code(f"SMILES: {profile['canonical_smiles']}") |
|
logger.info("Compound profile details rendered.") |
|
else: |
|
st.warning("No compound data available. Please verify the input.") |
|
logger.warning("Compound profiler did not return any data.") |
|
|
|
def _regulatory_hub(self) -> None: |
|
""" |
|
Render the regulatory intelligence hub module for accessing FDA and regulatory data. |
|
""" |
|
st.header("Regulatory Intelligence Hub") |
|
st.write("Access detailed insights into FDA approvals and regulatory pathways.") |
|
drug_name = st.text_input("Enter Drug Name for Regulatory Analysis:", placeholder="e.g., aspirin") |
|
|
|
if st.button("Fetch Regulatory Data"): |
|
with st.spinner("Retrieving regulatory information..."): |
|
fda_data = self.clinical_intel.get_fda_approval(drug_name) |
|
if fda_data: |
|
st.subheader("FDA Approval Details") |
|
st.json(fda_data) |
|
logger.info("FDA regulatory data displayed.") |
|
else: |
|
st.warning("No FDA data found for the specified drug.") |
|
logger.warning("FDA regulatory data retrieval returned no results.") |
|
|
|
def _ai_strategist(self) -> None: |
|
""" |
|
Render the AI strategist module for generating innovative drug development strategies. |
|
""" |
|
st.header("AI Drug Development Strategist") |
|
st.write("Utilize GPT-4 to craft cutting-edge drug development strategies.") |
|
target = st.text_input("Enter Target Disease or Pathway:", placeholder="e.g., KRAS G12C mutation") |
|
|
|
if st.button("Generate AI Strategy"): |
|
with st.spinner("Generating AI-driven strategy..."): |
|
strategy = self.ai_innovator.generate_strategy(target, "First-in-class") |
|
st.markdown(strategy, unsafe_allow_html=True) |
|
logger.info("AI-driven strategy generated and displayed.") |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
try: |
|
interface = PharmaResearchInterface() |
|
interface.render() |
|
logger.info("PRIS application launched successfully.") |
|
except Exception as e: |
|
logger.critical(f"Unexpected error during application launch: {str(e)}") |
|
st.error(f"Application failed to start due to an unexpected error: {str(e)}") |
|
|