mgbam commited on
Commit
62a6d51
·
verified ·
1 Parent(s): 44a12f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +511 -316
app.py CHANGED
@@ -1,356 +1,551 @@
1
-
2
-
3
- # -----------------------------
4
- # IMPORTS & CONFIGURATION
5
- # -----------------------------
6
  import streamlit as st
7
  import requests
8
- from rdkit import Chem
9
- from rdkit.Chem import Draw
10
  import pandas as pd
11
  import matplotlib.pyplot as plt
12
  import seaborn as sns
 
 
13
  from fpdf import FPDF
14
  import tempfile
15
  import logging
16
  import os
17
  import plotly.graph_objects as go
18
  import networkx as nx
19
- from typing import Optional, Dict, List, Any, Tuple
20
- from openai import OpenAI
21
 
22
- # Configure professional logging
23
- logging.basicConfig(
24
- level=logging.INFO,
25
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
26
- handlers=[logging.FileHandler("pris_debug.log")]
27
- )
28
- logger = logging.getLogger("PRIS")
29
-
30
- # -----------------------------
31
- # GLOBAL CONSTANTS
32
- # -----------------------------
33
  API_ENDPOINTS = {
34
- # Clinical Data Services
35
  "clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
36
- "fda_drug_approval": "https://api.fda.gov/drug/label.json",
37
- "faers_adverse_events": "https://api.fda.gov/drug/event.json",
38
-
39
- # Chemical & Biological Data
40
  "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
41
  "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
42
-
43
- # Pharmacogenomics Resources
44
- "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
45
- "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
46
  "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
47
-
48
- # Semantic Medical Resources
49
  "bioportal_search": "https://data.bioontology.org/search",
50
-
51
- # Drug Classification Systems
52
  "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
53
  "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
54
  "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
55
  }
56
 
57
- DEFAULT_HEADERS = {
58
- "User-Agent": "PharmaResearchIntelligenceSuite/1.0 (Professional Use)",
59
- "Accept": "application/json"
 
 
 
 
60
  }
61
 
62
- # -----------------------------
63
- # SECRETS MANAGEMENT
64
- # -----------------------------
65
- class APIConfigurationError(Exception):
66
- """Custom exception for missing API configurations"""
67
- pass
68
-
69
- try:
70
- OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
71
- BIOPORTAL_API_KEY = st.secrets["BIOPORTAL_API_KEY"]
72
- PUB_EMAIL = st.secrets["PUB_EMAIL"]
73
- OPENFDA_KEY = st.secrets["OPENFDA_KEY"]
74
-
75
- # Validate essential configurations
76
- if not all([OPENAI_API_KEY, BIOPORTAL_API_KEY, PUB_EMAIL, OPENFDA_KEY]):
77
- raise APIConfigurationError("Missing one or more required API credentials")
78
-
79
- except (KeyError, APIConfigurationError) as e:
80
- st.error(f"Critical configuration error: {str(e)}")
81
- st.stop()
82
-
83
- # -----------------------------
84
- # CORE INFRASTRUCTURE
85
- # -----------------------------
86
- class PharmaResearchEngine:
87
- """Core engine for pharmaceutical data integration and analysis"""
88
-
89
- def __init__(self):
90
- self.openai_client = OpenAI(api_key=OPENAI_API_KEY)
91
-
92
- @staticmethod
93
- def api_request(endpoint: str,
94
- params: Optional[Dict] = None,
95
- headers: Optional[Dict] = None) -> Optional[Dict]:
96
- """Enterprise-grade API request handler with advanced resilience"""
97
- try:
98
- response = requests.get(
99
- endpoint,
100
- params=params,
101
- headers={**DEFAULT_HEADERS, **(headers or {})},
102
- timeout=(3.05, 15)
103
- )
104
- response.raise_for_status()
105
- return response.json()
106
- except requests.exceptions.HTTPError as e:
107
- logger.error(f"HTTP Error {e.response.status_code} for {endpoint}")
108
- st.error(f"API Error: {e.response.status_code} - {e.response.reason}")
109
- except Exception as e:
110
- logger.error(f"Network error for {endpoint}: {str(e)}")
111
- st.error(f"Network error: {str(e)}")
112
- return None
113
 
114
- def get_compound_profile(self, identifier: str) -> Optional[Dict]:
115
- """Retrieve comprehensive chemical profile"""
116
- pubchem_data = self.api_request(
117
- API_ENDPOINTS["pubchem"].format(identifier)
 
 
 
 
 
 
 
 
 
118
  )
119
-
120
- if not pubchem_data or not pubchem_data.get("PC_Compounds"):
121
- return None
122
-
123
- compound = pubchem_data["PC_Compounds"][0]
124
- return {
125
- 'molecular_formula': self._extract_property(compound, 'Molecular Formula'),
126
- 'iupac_name': self._extract_property(compound, 'IUPAC Name'),
127
- 'canonical_smiles': self._extract_property(compound, 'Canonical SMILES'),
128
- 'molecular_weight': self._extract_property(compound, 'Molecular Weight'),
129
- 'logp': self._extract_property(compound, 'LogP')
130
- }
131
-
132
- def _extract_property(self, compound: Dict, prop_name: str) -> str:
133
- """Helper for property extraction from PubChem data"""
134
- for prop in compound.get("props", []):
135
- if prop.get("urn", {}).get("label") == prop_name:
 
 
 
 
 
 
 
 
 
 
 
 
136
  return prop["value"]["sval"]
137
- return "N/A"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- # -----------------------------
140
- # INTELLIGENCE MODULES
141
- # -----------------------------
142
- class ClinicalIntelligence:
143
- """Handles clinical trial and regulatory data analysis"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
- def __init__(self):
146
- self.engine = PharmaResearchEngine()
 
 
 
147
 
148
- def get_trial_landscape(self, query: str) -> List[Dict]:
149
- """Analyze clinical trial landscape for given query"""
150
- params = {"query.term": query, "retmax": 10} if not query.startswith("NCT") else {"id": query}
151
- trials = self.engine.api_request(API_ENDPOINTS["clinical_trials"], params=params)
152
- return trials.get("studies", [])[:5]
153
-
154
- def get_fda_approval(self, drug_name: str) -> Optional[Dict]:
155
- """Retrieve FDA approval information for a drug"""
156
- if not OPENFDA_KEY:
157
- st.error("OpenFDA API key not configured.")
158
- return None
159
-
160
- params = {
161
- "api_key": OPENFDA_KEY,
162
- "search": f'openfda.brand_name:"{drug_name}"',
163
- "limit": 1
164
- }
165
-
166
- data = self.engine.api_request(
167
- API_ENDPOINTS["fda_drug_approval"],
168
- params=params
169
- )
170
-
171
- if data and data.get("results"):
172
- return data["results"][0]
173
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- class AIDrugInnovator:
176
- """GPT-4 powered drug development strategist"""
 
 
 
 
177
 
178
- def __init__(self):
179
- self.engine = PharmaResearchEngine()
180
-
181
- def generate_strategy(self, target: str, strategy: str) -> str:
182
- """Generate AI-driven development strategy"""
183
- prompt = f"""As Chief Scientific Officer of a top pharmaceutical company, develop a {strategy} strategy for {target}.
184
- Include:
185
- - Target validation approach
186
- - Lead optimization tactics
187
- - Clinical trial design
188
- - Regulatory pathway analysis
189
- - Commercial potential assessment
190
- Format in Markdown with clear sections."""
191
-
192
- try:
193
- response = self.engine.openai_client.chat.completions.create(
194
- model="gpt-4",
195
- messages=[{"role": "user", "content": prompt}],
196
- temperature=0.7,
197
- max_tokens=1500
198
  )
199
- return response.choices[0].message.content
200
- except Exception as e:
201
- logger.error(f"AI Strategy Error: {str(e)}")
202
- return "Strategy generation failed. Please check API configuration."
203
-
204
- # -----------------------------
205
- # STREAMLIT INTERFACE
206
- # -----------------------------
207
- class PharmaResearchInterface:
208
- """Modern UI for pharmaceutical research platform"""
209
-
210
- def __init__(self):
211
- self.clinical_intel = ClinicalIntelligence()
212
- self.ai_innovator = AIDrugInnovator()
213
- self._configure_page()
214
-
215
- def _configure_page(self):
216
- """Setup Streamlit page configuration"""
217
- st.set_page_config(
218
- page_title="PRIS - Pharma Research Intelligence Suite",
219
- layout="wide",
220
- initial_sidebar_state="expanded"
221
- )
222
- st.markdown("""
223
- <style>
224
- .main {background-color: #f9f9f9;}
225
- .stAlert {padding: 20px;}
226
- .reportview-container .markdown-text-container {font-family: 'Arial'}
227
- </style>
228
- """, unsafe_allow_html=True)
229
-
230
- def render(self):
231
- """Main application interface"""
232
- st.title("Pharma Research Intelligence Suite")
233
- self._render_navigation()
234
-
235
- def _render_navigation(self):
236
- """Dynamic tab-based navigation system"""
237
- tabs = st.tabs([
238
- "🚀 Drug Innovation",
239
- "📈 Trial Analytics",
240
- "🧪 Compound Profiler",
241
- "📜 Regulatory Hub",
242
- "🤖 AI Strategist"
243
- ])
244
 
245
- with tabs[0]: self._drug_innovation()
246
- with tabs[1]: self._trial_analytics()
247
- with tabs[2]: self._compound_profiler()
248
- with tabs[3]: self._regulatory_hub()
249
- with tabs[4]: self._ai_strategist()
250
-
251
- def _drug_innovation(self):
252
- """Drug development strategy interface"""
253
- st.header("AI-Powered Drug Innovation Engine")
254
- col1, col2 = st.columns([1, 3])
255
 
256
- with col1:
257
- target = st.text_input("Target Pathobiology:", placeholder="e.g., EGFR mutant NSCLC")
258
- strategy = st.selectbox("Development Paradigm:",
259
- ["First-in-class", "Fast-follower", "Biologic", "ADC", "Gene Therapy"])
260
- if st.button("Generate Development Blueprint"):
261
- with st.spinner("Formulating strategic plan..."):
262
- blueprint = self.ai_innovator.generate_strategy(target, strategy)
263
- st.markdown(blueprint, unsafe_allow_html=True)
264
-
265
- def _trial_analytics(self):
266
- """Clinical trial analytics interface"""
267
- st.header("Clinical Trial Landscape Analysis")
268
- trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number")
269
-
270
- if st.button("Analyze Trial Landscape"):
271
- with st.spinner("Fetching trial data..."):
272
- trials = self.clinical_intel.get_trial_landscape(trial_query)
273
-
274
- if trials:
275
- st.subheader("Top 5 Clinical Trials")
276
- trial_data = []
277
- for study in trials:
278
- trial_data.append({
279
- "Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"),
280
- "Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"),
281
- "Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["N/A"])[0],
282
- "Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "N/A")
283
- })
284
-
285
- # Display as a DataFrame
286
- df = pd.DataFrame(trial_data)
287
- st.dataframe(df)
288
-
289
- # Visualization
290
- st.subheader("Trial Phase Distribution")
291
- phase_counts = df["Phase"].value_counts()
292
- fig, ax = plt.subplots()
293
- sns.barplot(x=phase_counts.index, y=phase_counts.values, ax=ax)
294
- ax.set_xlabel("Trial Phase")
295
- ax.set_ylabel("Number of Trials")
296
- st.pyplot(fig)
297
  else:
298
- st.warning("No clinical trials found for the query.")
 
 
299
 
300
- def _compound_profiler(self):
301
- """Advanced chemical analysis interface"""
302
- st.header("Multi-Omics Compound Profiler")
303
- compound = st.text_input("Analyze Compound:", placeholder="Enter drug name or SMILES")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
- if compound:
306
- with st.spinner("Decoding molecular profile..."):
307
- profile = PharmaResearchEngine().get_compound_profile(compound)
308
-
309
- if profile:
310
- col1, col2 = st.columns(2)
311
- with col1:
312
- st.subheader("Structural Insights")
313
- mol = Chem.MolFromSmiles(profile['canonical_smiles'])
314
- if mol:
315
- img = Draw.MolToImage(mol, size=(400, 300))
316
- st.image(img, caption="2D Molecular Structure")
317
-
318
- with col2:
319
- st.subheader("Physicochemical Profile")
320
- st.metric("Molecular Weight", profile['molecular_weight'])
321
- st.metric("LogP", profile['logp'])
322
- st.metric("IUPAC Name", profile['iupac_name'])
323
- st.code(f"SMILES: {profile['canonical_smiles']}")
324
-
325
- def _regulatory_hub(self):
326
- """Regulatory intelligence interface"""
327
- st.header("Regulatory Intelligence Hub")
328
- st.write("This section provides insights into FDA approvals and regulatory pathways.")
329
- drug_name = st.text_input("Enter Drug Name for Regulatory Analysis:", placeholder="e.g., aspirin")
330
-
331
- if st.button("Fetch Regulatory Data"):
332
- with st.spinner("Retrieving regulatory information..."):
333
- fda_data = self.clinical_intel.get_fda_approval(drug_name)
334
- if fda_data:
335
- st.subheader("FDA Approval Details")
336
- st.json(fda_data)
337
- else:
338
- st.warning("No FDA data found for the specified drug.")
339
 
340
- def _ai_strategist(self):
341
- """AI-driven drug strategy interface"""
342
- st.header("AI Drug Development Strategist")
343
- st.write("Leverage GPT-4 for innovative drug development strategies.")
344
- target = st.text_input("Enter Target Disease or Pathway:", placeholder="e.g., KRAS G12C mutation")
345
-
346
- if st.button("Generate AI Strategy"):
347
- with st.spinner("Generating AI-driven strategy..."):
348
- strategy = self.ai_innovator.generate_strategy(target, "First-in-class")
349
- st.markdown(strategy, unsafe_allow_html=True)
350
-
351
- # -----------------------------
352
- # MAIN EXECUTION
353
- # -----------------------------
354
- if __name__ == "__main__":
355
- interface = PharmaResearchInterface()
356
- interface.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import requests
 
 
3
  import pandas as pd
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
+ from rdkit import Chem
7
+ from rdkit.Chem import Draw
8
  from fpdf import FPDF
9
  import tempfile
10
  import logging
11
  import os
12
  import plotly.graph_objects as go
13
  import networkx as nx
14
+ from typing import Optional, Dict, List, Any
15
+ from datetime import datetime
16
 
17
+ # -------------------------------
18
+ # STREAMLIT CONFIGURATION & LOGGING
19
+ # -------------------------------
20
+ st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
21
+ logging.basicConfig(level=logging.ERROR)
22
+
23
+ # -------------------------------
24
+ # API ENDPOINTS (Stable Sources Only)
25
+ # -------------------------------
 
 
26
  API_ENDPOINTS = {
 
27
  "clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
 
 
 
 
28
  "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
29
  "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
30
+ "fda_drug_approval": "https://api.fda.gov/drug/label.json",
31
+ "faers_adverse_events": "https://api.fda.gov/drug/event.json",
32
+ # PharmGKB endpoints for gene variants (if available)
 
33
  "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
34
+ # BioPortal for ontology searches
 
35
  "bioportal_search": "https://data.bioontology.org/search",
36
+ # RxNorm & RxClass endpoints
 
37
  "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
38
  "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
39
  "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
40
  }
41
 
42
+ # -------------------------------
43
+ # TRADE-TO-GENERIC MAPPING (FALLBACK)
44
+ # -------------------------------
45
+ TRADE_TO_GENERIC = {
46
+ "tylenol": "acetaminophen",
47
+ "advil": "ibuprofen",
48
+ # Extend with additional mappings as desired
49
  }
50
 
51
+ # -------------------------------
52
+ # SECRETS RETRIEVAL
53
+ # -------------------------------
54
+ OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY")
55
+ BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
56
+ PUB_EMAIL = st.secrets.get("PUB_EMAIL")
57
+ OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
58
+
59
+ if not PUB_EMAIL:
60
+ st.error("PUB_EMAIL is not configured in secrets.")
61
+ if not BIOPORTAL_API_KEY:
62
+ st.error("BIOPORTAL_API_KEY is not configured in secrets.")
63
+ if not OPENFDA_KEY:
64
+ st.error("OPENFDA_KEY is not configured in secrets.")
65
+ if not OPENAI_API_KEY:
66
+ st.error("OPENAI_API_KEY is not configured in secrets.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # -------------------------------
69
+ # INITIALIZE OPENAI (GPT-4) CLIENT
70
+ # -------------------------------
71
+ from openai import OpenAI
72
+ openai_client = OpenAI(api_key=OPENAI_API_KEY)
73
+
74
+ def generate_ai_content(prompt: str) -> str:
75
+ """Call GPT-4 to generate innovative insights."""
76
+ try:
77
+ response = openai_client.chat.completions.create(
78
+ model="gpt-4",
79
+ messages=[{"role": "user", "content": prompt}],
80
+ max_tokens=300
81
  )
82
+ return response.choices[0].message.content.strip()
83
+ except Exception as e:
84
+ st.error(f"GPT-4 generation error: {e}")
85
+ logging.error(e)
86
+ return "AI content generation failed."
87
+
88
+ # -------------------------------
89
+ # UTILITY FUNCTIONS WITH CACHING
90
+ # -------------------------------
91
+ @st.cache_data(show_spinner=False)
92
+ def query_api(endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict]:
93
+ """Wrapper for HTTP GET requests with error handling."""
94
+ try:
95
+ response = requests.get(endpoint, params=params, headers=headers, timeout=15)
96
+ response.raise_for_status()
97
+ return response.json()
98
+ except Exception as e:
99
+ st.error(f"API error for {endpoint}: {e}")
100
+ logging.error(f"API error for {endpoint}: {e}")
101
+ return None
102
+
103
+ @st.cache_data(show_spinner=False)
104
+ def get_pubchem_smiles(drug_name: str) -> Optional[str]:
105
+ """Retrieve canonical SMILES using PubChem."""
106
+ url = API_ENDPOINTS["pubchem"].format(drug_name)
107
+ data = query_api(url)
108
+ if data and data.get("PC_Compounds"):
109
+ for prop in data["PC_Compounds"][0].get("props", []):
110
+ if prop.get("name") == "Canonical SMILES":
111
  return prop["value"]["sval"]
112
+ return None
113
+
114
+ def draw_molecule(smiles: str) -> Optional[Any]:
115
+ """Generate a 2D molecule image using RDKit."""
116
+ try:
117
+ mol = Chem.MolFromSmiles(smiles)
118
+ if mol:
119
+ return Draw.MolToImage(mol)
120
+ else:
121
+ st.error("Invalid SMILES string provided.")
122
+ except Exception as e:
123
+ st.error(f"Error drawing molecule: {e}")
124
+ logging.error(e)
125
+ return None
126
+
127
+ @st.cache_data(show_spinner=False)
128
+ def get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
129
+ """Retrieve drug details from PubChem."""
130
+ url = API_ENDPOINTS["pubchem"].format(drug_name)
131
+ data = query_api(url)
132
+ details = {}
133
+ if data and data.get("PC_Compounds"):
134
+ compound = data["PC_Compounds"][0]
135
+ for prop in compound.get("props", []):
136
+ urn = prop.get("urn", {})
137
+ if urn.get("label") == "Molecular Formula":
138
+ details["Molecular Formula"] = prop["value"]["sval"]
139
+ if urn.get("name") == "Preferred":
140
+ details["IUPAC Name"] = prop["value"]["sval"]
141
+ if prop.get("name") == "Canonical SMILES":
142
+ details["Canonical SMILES"] = prop["value"]["sval"]
143
+ return details
144
+ return None
145
+
146
+ @st.cache_data(show_spinner=False)
147
+ def get_clinical_trials(query: str) -> Optional[Dict]:
148
+ """Query ClinicalTrials.gov."""
149
+ if query.upper().startswith("NCT") and query[3:].isdigit():
150
+ params = {"id": query, "fmt": "json"}
151
+ else:
152
+ params = {"query.term": query, "retmax": 10, "retmode": "json"}
153
+ return query_api(API_ENDPOINTS["clinical_trials"], params)
154
+
155
+ @st.cache_data(show_spinner=False)
156
+ def get_pubmed(query: str) -> Optional[Dict]:
157
+ """Query PubMed."""
158
+ params = {"db": "pubmed", "term": query, "retmax": 10, "retmode": "json", "email": PUB_EMAIL}
159
+ return query_api(API_ENDPOINTS["pubmed"], params)
160
+
161
+ @st.cache_data(show_spinner=False)
162
+ def get_fda_approval(drug_name: str) -> Optional[Dict]:
163
+ """Retrieve FDA approval info using openFDA."""
164
+ if not OPENFDA_KEY:
165
+ st.error("OpenFDA key not configured.")
166
+ return None
167
+ query = f'openfda.brand_name:"{drug_name}"'
168
+ params = {"api_key": OPENFDA_KEY, "search": query, "limit": 1}
169
+ data = query_api(API_ENDPOINTS["fda_drug_approval"], params)
170
+ if data and data.get("results"):
171
+ return data["results"][0]
172
+ return None
173
 
174
+ @st.cache_data(show_spinner=False)
175
+ def analyze_adverse_events(drug_name: str, limit: int = 5) -> Optional[Dict]:
176
+ """Retrieve FAERS adverse events."""
177
+ if not OPENFDA_KEY:
178
+ st.error("OpenFDA key not configured.")
179
+ return None
180
+ query = f'patient.drug.medicinalproduct:"{drug_name}"'
181
+ params = {"api_key": OPENFDA_KEY, "search": query, "limit": limit}
182
+ return query_api(API_ENDPOINTS["faers_adverse_events"], params)
183
+
184
+ @st.cache_data(show_spinner=False)
185
+ def get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]]:
186
+ """Return variant IDs for a PharmGKB gene accession."""
187
+ if not pharmgkb_gene_id.startswith("PA"):
188
+ st.warning("Enter a valid PharmGKB gene accession (e.g., PA1234).")
189
+ return None
190
+ endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(pharmgkb_gene_id)
191
+ data = query_api(endpoint)
192
+ if data and data.get("data"):
193
+ return [variant["id"] for variant in data["data"]]
194
+ st.warning(f"No variants found for PharmGKB gene {pharmgkb_gene_id}.")
195
+ return None
196
+
197
+ @st.cache_data(show_spinner=False)
198
+ def get_rxnorm_rxcui(drug_name: str) -> Optional[str]:
199
+ """Return RxCUI for a drug."""
200
+ url = f"{API_ENDPOINTS['rxnorm_rxcui']}?name={drug_name}"
201
+ data = query_api(url)
202
+ if data and "idGroup" in data and data["idGroup"].get("rxnormId"):
203
+ return data["idGroup"]["rxnormId"][0]
204
+ st.warning(f"No RxCUI found for {drug_name}.")
205
+ return None
206
+
207
+ @st.cache_data(show_spinner=False)
208
+ def get_rxnorm_properties(rxcui: str) -> Optional[Dict]:
209
+ """Return RxNorm properties for a given RxCUI."""
210
+ url = API_ENDPOINTS["rxnorm_properties"].format(rxcui)
211
+ return query_api(url)
212
+
213
+ @st.cache_data(show_spinner=False)
214
+ def get_rxclass_by_drug_name(drug_name: str) -> Optional[Dict]:
215
+ """Return RxClass information for a drug."""
216
+ url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
217
+ data = query_api(url)
218
+ if data and "classMember" in data:
219
+ return data
220
+ return None
221
+
222
+ # -------------------------------
223
+ # AI-DRIVEN DRUG INSIGHTS
224
+ # -------------------------------
225
+ def generate_drug_insights(drug_name: str) -> str:
226
+ """
227
+ Gather FDA, PubChem, RxNorm, and RxClass data (using generic fallback) and build a GPT‑4 prompt
228
+ for an innovative, bullet‑point drug analysis.
229
+ """
230
+ query_name = TRADE_TO_GENERIC.get(drug_name.lower(), drug_name)
231
 
232
+ # Get FDA data
233
+ fda_info = get_fda_approval(query_name)
234
+ fda_status = "Not Approved"
235
+ if fda_info and fda_info.get("openfda", {}).get("brand_name"):
236
+ fda_status = ", ".join(fda_info["openfda"]["brand_name"])
237
 
238
+ # Get PubChem details
239
+ pubchem_details = get_pubchem_drug_details(query_name)
240
+ if pubchem_details:
241
+ formula = pubchem_details.get("Molecular Formula", "N/A")
242
+ iupac = pubchem_details.get("IUPAC Name", "N/A")
243
+ canon_smiles = pubchem_details.get("Canonical SMILES", "N/A")
244
+ else:
245
+ formula = iupac = canon_smiles = "Not Available"
246
+
247
+ # RxNorm and RxClass
248
+ rxnorm_id = get_rxnorm_rxcui(query_name)
249
+ if rxnorm_id:
250
+ rx_props = get_rxnorm_properties(rxnorm_id)
251
+ rxnorm_info = f"RxCUI: {rxnorm_id}\nProperties: {rx_props}"
252
+ else:
253
+ rxnorm_info = "No RxNorm data available."
254
+
255
+ rxclass_data = get_rxclass_by_drug_name(query_name)
256
+ rxclass_info = rxclass_data if rxclass_data else "No RxClass data available."
257
+
258
+ # Construct prompt for GPT-4
259
+ prompt = (
260
+ f"Please provide an advanced, innovative drug analysis report for '{drug_name}' "
261
+ f"(generic: {query_name}).\n\n"
262
+ f"**FDA Approval Status:** {fda_status}\n"
263
+ f"**PubChem Details:** Molecular Formula: {formula}, IUPAC Name: {iupac}, Canonical SMILES: {canon_smiles}\n"
264
+ f"**RxNorm Info:** {rxnorm_info}\n"
265
+ f"**RxClass Info:** {rxclass_info}\n\n"
266
+ f"Include the following in bullet points:\n"
267
+ f"- Pharmacogenomic considerations (including genetic variants that might affect metabolism and toxicity).\n"
268
+ f"- Potential repurposing opportunities based on drug mechanism.\n"
269
+ f"- Regulatory insights and challenges, particularly for expanding indications or personalized medicine.\n"
270
+ f"- Innovative suggestions for future research and data integration approaches.\n"
271
+ )
272
+ return generate_ai_content(prompt)
273
+
274
+ # -------------------------------
275
+ # STREAMLIT APP LAYOUT
276
+ # -------------------------------
277
+ tabs = st.tabs([
278
+ "💊 Drug Development",
279
+ "📊 Trial Analytics",
280
+ "🧬 Molecular Profiling",
281
+ "📜 Regulatory Intelligence",
282
+ "📚 Literature Search",
283
+ "📈 Dashboard",
284
+ "🧪 Drug Data Integration",
285
+ "🤖 AI Insights"
286
+ ])
287
 
288
+ # ----- Tab 1: Drug Development -----
289
+ with tabs[0]:
290
+ st.header("AI-Driven Drug Development Strategy")
291
+ target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
292
+ target_gene = st.text_input("Target Gene (PharmGKB Accession):", placeholder="e.g., PA1234")
293
+ strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
294
 
295
+ if st.button("Generate Development Plan"):
296
+ with st.spinner("Generating comprehensive development plan..."):
297
+ plan_prompt = (
298
+ f"Develop a detailed drug development plan for treating {target} using a {strategy} strategy. "
299
+ "Include sections on target validation, lead optimization, preclinical testing, clinical trial design, "
300
+ "regulatory strategy, market analysis, competitive landscape, and relevant pharmacogenomic considerations."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  )
302
+ plan = generate_ai_content(plan_prompt)
303
+ st.subheader("Comprehensive Development Plan")
304
+ st.markdown(plan)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
+ st.subheader("FDA Regulatory Insights")
307
+ if target:
308
+ fda_data = get_fda_approval(target.split()[0])
309
+ if fda_data:
310
+ st.json(fda_data)
311
+ else:
312
+ st.write("No FDA data found for the given target.")
 
 
 
313
 
314
+ st.subheader("Pharmacogenomic Considerations")
315
+ if target_gene:
316
+ if not target_gene.startswith("PA"):
317
+ st.warning("Enter a valid PharmGKB accession (e.g., PA1234).")
318
+ else:
319
+ variants = get_pharmgkb_variants_for_gene(target_gene)
320
+ if variants:
321
+ st.write("PharmGKB Variants:")
322
+ st.write(variants)
323
+ for vid in variants[:3]:
324
+ annotations = _get_pharmgkb_clinical_annotations(vid)
325
+ st.write(f"Annotations for Variant {vid}:")
326
+ st.json(annotations if annotations else {"message": "No annotations found."})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  else:
328
+ st.write("No variants found for the specified PharmGKB gene accession.")
329
+ else:
330
+ st.write("Provide a PharmGKB gene accession to retrieve pharmacogenomic data.")
331
 
332
+ # ----- Tab 2: Clinical Trial Analytics -----
333
+ with tabs[1]:
334
+ st.header("Clinical Trial Landscape Analytics")
335
+ trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number")
336
+ if st.button("Analyze Trial Landscape"):
337
+ with st.spinner("Fetching trial data..."):
338
+ trials = get_clinical_trials(trial_query)
339
+ if trials and trials.get("studies"):
340
+ trial_data = []
341
+ for study in trials["studies"][:5]:
342
+ trial_data.append({
343
+ "Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"),
344
+ "Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"),
345
+ "Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["Not Available"])[0],
346
+ "Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "N/A")
347
+ })
348
+ _display_dataframe(trial_data, list(trial_data[0].keys()))
349
+ else:
350
+ st.warning("No clinical trials found for the query.")
351
 
352
+ ae_data = analyze_adverse_events(trial_query)
353
+ if ae_data and ae_data.get("results"):
354
+ st.subheader("Adverse Event Profile (Top 5)")
355
+ ae_results = ae_data["results"][:5]
356
+ ae_df = pd.json_normalize(ae_results)
357
+ st.dataframe(ae_df)
358
+ if "patient.reaction.reactionmeddrapt" in ae_df.columns:
359
+ try:
360
+ reactions = ae_df["patient.reaction.reactionmeddrapt"].explode().dropna()
361
+ top_reactions = reactions.value_counts().nlargest(10)
362
+ fig, ax = plt.subplots(figsize=(10, 6))
363
+ sns.barplot(x=top_reactions.values, y=top_reactions.index, ax=ax)
364
+ ax.set_title("Top Adverse Reactions")
365
+ ax.set_xlabel("Frequency")
366
+ ax.set_ylabel("Reaction")
367
+ st.pyplot(fig)
368
+ except Exception as e:
369
+ st.error(f"Error visualizing adverse events: {e}")
370
+ else:
371
+ st.write("No adverse event data available.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
 
373
+ # ----- Tab 3: Molecular Profiling -----
374
+ with tabs[2]:
375
+ st.header("Advanced Molecular Profiling")
376
+ compound_input = st.text_input("Compound Identifier:", placeholder="Enter drug name, SMILES, or INN")
377
+ if st.button("Analyze Compound"):
378
+ with st.spinner("Querying PubChem for molecular structure..."):
379
+ query_compound = TRADE_TO_GENERIC.get(compound_input.lower(), compound_input)
380
+ smiles = _get_pubchem_smiles(query_compound)
381
+ if smiles:
382
+ mol_image = draw_molecule(smiles)
383
+ if mol_image:
384
+ st.image(mol_image, caption="2D Molecular Structure")
385
+ else:
386
+ st.error("Molecular structure not found. Try a more specific compound name.")
387
+ pubchem_data = query_api(API_ENDPOINTS["pubchem"].format(query_compound))
388
+ if pubchem_data and pubchem_data.get("PC_Compounds"):
389
+ st.subheader("Physicochemical Properties")
390
+ props = pubchem_data["PC_Compounds"][0].get("props", [])
391
+ mw = next((prop["value"]["sval"] for prop in props if prop.get("name") == "Molecular Weight"), "N/A")
392
+ logp = next((prop["value"]["sval"] for prop in props if prop.get("name") == "LogP"), "N/A")
393
+ st.write(f"**Molecular Weight:** {mw}")
394
+ st.write(f"**LogP:** {logp}")
395
+ else:
396
+ st.error("Physicochemical properties not available.")
397
+
398
+ # ----- Tab 4: Regulatory Intelligence -----
399
+ with tabs[3]:
400
+ st.header("Global Regulatory Monitoring")
401
+ st.markdown("**Note:** Due to persistent issues with EMA/WHO/DailyMed APIs, this section focuses on FDA data and PubChem drug details.")
402
+ drug_prod = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
403
+ if st.button("Generate Regulatory Report"):
404
+ with st.spinner("Compiling regulatory data..."):
405
+ fda_data = get_fda_approval(drug_prod)
406
+ fda_status = "Not Approved"
407
+ if fda_data and fda_data.get("openfda", {}).get("brand_name"):
408
+ fda_status = ", ".join(fda_data["openfda"]["brand_name"])
409
+ pubchem_details = get_pubchem_drug_details(drug_prod)
410
+ if pubchem_details:
411
+ formula = pubchem_details.get("Molecular Formula", "N/A")
412
+ iupac = pubchem_details.get("IUPAC Name", "N/A")
413
+ canon_smiles = pubchem_details.get("Canonical SMILES", "N/A")
414
+ else:
415
+ formula = iupac = canon_smiles = "Not Available"
416
+ col1, col2 = st.columns(2)
417
+ with col1:
418
+ st.markdown("**FDA Status**")
419
+ st.write(fda_status)
420
+ with col2:
421
+ st.markdown("**Drug Details (PubChem)**")
422
+ st.write(f"**Molecular Formula:** {formula}")
423
+ st.write(f"**IUPAC Name:** {iupac}")
424
+ st.write(f"**Canonical SMILES:** {canon_smiles}")
425
+ report_text = (
426
+ f"### Regulatory Report for {drug_prod}\n\n"
427
+ f"**FDA Status:** {fda_status}\n\n"
428
+ f"**Molecular Formula:** {formula}\n\n"
429
+ f"**IUPAC Name:** {iupac}\n\n"
430
+ f"**Canonical SMILES:** {canon_smiles}\n"
431
+ )
432
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
433
+ pdf_file = _save_pdf_report(report_text, tmp.name)
434
+ if pdf_file:
435
+ with open(pdf_file, "rb") as f:
436
+ st.download_button("Download Regulatory Report (PDF)", data=f, file_name=f"{drug_prod}_report.pdf", mime="application/pdf")
437
+ os.remove(pdf_file)
438
+
439
+ # ----- Tab 5: Literature Search -----
440
+ with tabs[4]:
441
+ st.header("Literature Search")
442
+ lit_query = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics")
443
+ if st.button("Search PubMed"):
444
+ with st.spinner("Searching PubMed..."):
445
+ pubmed_results = get_pubmed(lit_query)
446
+ if pubmed_results and pubmed_results.get("esearchresult", {}).get("idlist"):
447
+ id_list = pubmed_results["esearchresult"]["idlist"]
448
+ st.subheader(f"Found {len(id_list)} PubMed Results")
449
+ for pmid in id_list:
450
+ st.markdown(f"- [PMID: {pmid}](https://pubmed.ncbi.nlm.nih.gov/{pmid}/)")
451
+ else:
452
+ st.write("No PubMed results found.")
453
+ st.header("Ontology Search")
454
+ ont_query = st.text_input("Enter search query for Ontology:", placeholder="e.g., Alzheimer's disease")
455
+ ont_select = st.selectbox("Select Ontology", ["MESH", "NCIT", "GO", "SNOMEDCT"])
456
+ if st.button("Search BioPortal"):
457
+ with st.spinner("Searching BioPortal..."):
458
+ bioportal_results = _get_bioportal_data(ont_select, ont_query)
459
+ if bioportal_results and bioportal_results.get("collection"):
460
+ st.subheader(f"BioPortal Results for {ont_select}")
461
+ for item in bioportal_results["collection"]:
462
+ label = item.get("prefLabel", "N/A")
463
+ ont_id = item.get("@id", "N/A")
464
+ st.markdown(f"- **{label}** ({ont_id})")
465
+ else:
466
+ st.write("No ontology results found.")
467
+
468
+ # ----- Tab 6: Comprehensive Dashboard -----
469
+ with tabs[5]:
470
+ st.header("Comprehensive Dashboard")
471
+ # Example KPIs (these could later be replaced by dynamic queries)
472
+ kpi_fda = 5000
473
+ kpi_trials = 12000
474
+ kpi_pubs = 250000
475
+ col1, col2, col3 = st.columns(3)
476
+ col1.metric("FDA Approved Drugs", kpi_fda)
477
+ col2.metric("Ongoing Trials", kpi_trials)
478
+ col3.metric("Publications", kpi_pubs)
479
+ st.subheader("Trend Analysis")
480
+ years = list(range(2000, 2026))
481
+ approvals = [kpi_fda // len(years)] * len(years)
482
+ fig_trend, ax_trend = plt.subplots(figsize=(10, 6))
483
+ sns.lineplot(x=years, y=approvals, marker="o", ax=ax_trend)
484
+ ax_trend.set_title("FDA Approvals Over Time")
485
+ ax_trend.set_xlabel("Year")
486
+ ax_trend.set_ylabel("Number of Approvals")
487
+ st.pyplot(fig_trend)
488
+ st.subheader("Gene-Variant-Drug Network (Sample)")
489
+ sample_gene = "CYP2C19"
490
+ sample_variants = ["rs4244285", "rs12248560"]
491
+ sample_annots = {"rs4244285": ["Clopidogrel", "Omeprazole"], "rs12248560": ["Sertraline"]}
492
+ try:
493
+ net_fig = _create_variant_network(sample_gene, sample_variants, sample_annots)
494
+ st.plotly_chart(net_fig, use_container_width=True)
495
+ except Exception as e:
496
+ st.error(f"Network graph error: {e}")
497
+
498
+ # ----- Tab 7: Drug Data Integration -----
499
+ with tabs[6]:
500
+ st.header("🧪 Drug Data Integration")
501
+ drug_integration = st.text_input("Enter Drug Name for API Integration:", placeholder="e.g., aspirin")
502
+ if st.button("Retrieve Drug Data"):
503
+ with st.spinner("Fetching drug data from multiple sources..."):
504
+ query_drug = TRADE_TO_GENERIC.get(drug_integration.lower(), drug_integration)
505
+ rxnorm_id = get_rxnorm_rxcui(query_drug)
506
+ rx_props = get_rxnorm_properties(rxnorm_id) if rxnorm_id else None
507
+ rxclass_info = get_rxclass_by_drug_name(query_drug)
508
+ st.subheader("RxNorm Data")
509
+ if rxnorm_id:
510
+ st.write(f"RxCUI for {drug_integration}: {rxnorm_id}")
511
+ st.json(rx_props if rx_props else {"message": "No RxNorm properties found."})
512
+ else:
513
+ st.write("No RxCUI found for the given drug name.")
514
+ st.subheader("RxClass Information")
515
+ if rxclass_info:
516
+ st.json(rxclass_info)
517
+ else:
518
+ st.write("No RxClass data found for the given drug.")
519
+ pubchem_info = get_pubchem_drug_details(query_drug)
520
+ st.subheader("PubChem Drug Details")
521
+ if pubchem_info:
522
+ st.write(f"**Molecular Formula:** {pubchem_info.get('Molecular Formula', 'N/A')}")
523
+ st.write(f"**IUPAC Name:** {pubchem_info.get('IUPAC Name', 'N/A')}")
524
+ st.write(f"**Canonical SMILES:** {pubchem_info.get('Canonical SMILES', 'N/A')}")
525
+ else:
526
+ st.write("No PubChem details found.")
527
+
528
+ # ----- Tab 8: AI Insights -----
529
+ with tabs[7]:
530
+ st.header("🤖 AI Insights")
531
+ ai_drug = st.text_input("Enter Drug Name for AI-Driven Analysis:", placeholder="e.g., tylenol")
532
+ if st.button("Generate AI Insights"):
533
+ with st.spinner("Generating AI-driven insights..."):
534
+ query_ai_drug = TRADE_TO_GENERIC.get(ai_drug.lower(), ai_drug)
535
+ insights_text = generate_drug_insights(query_ai_drug)
536
+ st.subheader("AI-Driven Drug Analysis")
537
+ st.markdown(insights_text)
538
+
539
+ # -------------------------------
540
+ # SIDEBAR INFORMATION
541
+ # -------------------------------
542
+ st.sidebar.header("About")
543
+ st.sidebar.info("""
544
+ **Pharma Research Expert Platform**
545
+
546
+ An innovative, AI-driven tool for advanced drug discovery, clinical research, and regulatory analysis.
547
+
548
+ **Developed by:** Your Name
549
+ **Contact:** [[email protected]](mailto:[email protected])
550
+ Last updated: {}
551
+ """.format(datetime.now().strftime("%Y-%m-%d")))