Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ from fpdf import FPDF
|
|
6 |
import tempfile
|
7 |
import time
|
8 |
import requests
|
9 |
-
|
10 |
import json
|
11 |
import pandas as pd
|
12 |
import matplotlib.pyplot as plt
|
@@ -14,108 +14,106 @@ import seaborn as sns
|
|
14 |
from typing import Optional, Dict, List, Any
|
15 |
import os
|
16 |
import logging
|
|
|
17 |
|
18 |
# Setup logging
|
19 |
-
logging.basicConfig(level=logging.ERROR)
|
20 |
|
21 |
# API Endpoints (Centralized Configuration)
|
22 |
API_ENDPOINTS = {
|
23 |
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
|
24 |
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
|
25 |
-
|
26 |
"who_drugs": "https://health-products.canada.ca/api/drug/product",
|
27 |
-
|
28 |
-
"
|
29 |
-
"
|
30 |
-
"
|
31 |
-
|
|
|
|
|
32 |
}
|
33 |
|
34 |
-
#Email addresses
|
35 |
-
|
36 |
-
if "PUB_EMAIL" in st.secrets:
|
37 |
-
PUBMED_EMAIL = st.secrets["PUB_EMAIL"]
|
38 |
-
else:
|
39 |
-
PUBMED_EMAIL = None
|
40 |
-
st.error("PubMed email not found in secrets. Please add the PUB_EMAIL to secrets.")
|
41 |
CLINICALTRIALS_EMAIL = PUBMED_EMAIL
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
st.
|
56 |
-
|
57 |
# Initialize AI Agent (Context-aware)
|
58 |
content_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=HfApiModel())
|
59 |
|
60 |
# --- Utility Functions ---
|
61 |
-
def _query_api(endpoint: str, params: Optional[Dict] = None) -> Optional[Dict]:
|
62 |
"""Handles API requests with robust error handling."""
|
63 |
try:
|
64 |
-
response = requests.get(endpoint, params=params, timeout=15)
|
65 |
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
66 |
return response.json()
|
67 |
-
except requests.exceptions.
|
68 |
-
st.error(f"
|
69 |
-
logging.error(f"
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
-
def
|
73 |
-
"""
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
params = {
|
79 |
-
"db": "pubmed",
|
80 |
-
"term": query,
|
81 |
-
"retmax": 10,
|
82 |
-
"retmode": "json",
|
83 |
-
"email": email
|
84 |
-
}
|
85 |
-
data = _query_api(API_ENDPOINTS["pubmed"], params)
|
86 |
-
if data and 'esearchresult' in data:
|
87 |
-
return data
|
88 |
else:
|
|
|
89 |
return None
|
90 |
|
91 |
-
def
|
92 |
-
"""
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
105 |
|
106 |
def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
|
107 |
"""Retrieves SMILES from PubChem, returns None on failure."""
|
108 |
url = API_ENDPOINTS["pubchem"].format(drug_name)
|
109 |
data = _query_api(url)
|
110 |
-
if data and 'PC_Compounds' in data and data['PC_Compounds']
|
111 |
-
#Check if props exists and find SMILES value
|
112 |
-
for prop in data['PC_Compounds'][0]
|
113 |
if 'name' in prop and prop['name'] == 'Canonical SMILES':
|
114 |
return prop['value']['sval']
|
115 |
return None
|
116 |
|
117 |
-
|
118 |
-
def _draw_molecule(smiles: str) -> Optional[any]:
|
119 |
"""Generates a 2D molecule image from SMILES."""
|
120 |
try:
|
121 |
mol = Chem.MolFromSmiles(smiles)
|
@@ -130,8 +128,7 @@ def _draw_molecule(smiles: str) -> Optional[any]:
|
|
130 |
logging.error(f"Error generating molecule image: {str(e)}")
|
131 |
return None
|
132 |
|
133 |
-
|
134 |
-
def _get_clinical_trials(query: str, email:Optional[str] = CLINICALTRIALS_EMAIL) -> Optional[Dict]:
|
135 |
"""Queries clinicaltrials.gov with search term."""
|
136 |
if not email:
|
137 |
st.error("Clinical Trials email not configured.")
|
@@ -143,97 +140,298 @@ def _get_clinical_trials(query: str, email:Optional[str] = CLINICALTRIALS_EMAIL)
|
|
143 |
}
|
144 |
else:
|
145 |
params = {
|
146 |
-
"
|
147 |
-
"
|
148 |
-
|
|
|
149 |
}
|
150 |
return _query_api(API_ENDPOINTS["clinical_trials"], params)
|
151 |
|
152 |
-
|
153 |
-
def _get_fda_approval(drug_name: str, api_key:Optional[str] = OPENFDA_KEY) -> Optional[Dict]:
|
154 |
"""Retrieves FDA approval info."""
|
155 |
if not api_key:
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
161 |
return data['results'][0]
|
162 |
else:
|
163 |
-
|
164 |
|
165 |
-
def _analyze_adverse_events(drug_name: str, api_key:Optional[str] = OPENFDA_KEY, limit: int = 5) -> Optional[Dict]:
|
166 |
"""Fetches and analyzes adverse event reports from FAERS."""
|
167 |
if not api_key:
|
168 |
st.error("OpenFDA key not configured.")
|
169 |
return None
|
170 |
-
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
172 |
if data and 'results' in data:
|
173 |
return data
|
174 |
else:
|
175 |
return None
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
-
def
|
179 |
-
"""
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
|
188 |
"""Fetches data from BioPortal."""
|
189 |
if not BIOPORTAL_API_KEY:
|
190 |
-
|
191 |
-
|
192 |
if not term:
|
193 |
-
|
194 |
-
|
195 |
|
196 |
headers = {
|
197 |
"Authorization": f"apikey token={BIOPORTAL_API_KEY}"
|
198 |
-
|
199 |
params = {
|
200 |
"q": term,
|
201 |
"ontologies": ontology
|
202 |
}
|
203 |
|
204 |
-
|
205 |
try:
|
206 |
-
|
207 |
-
response.raise_for_status()
|
208 |
-
data = response.json()
|
209 |
if data and 'collection' in data:
|
210 |
return data
|
211 |
else:
|
212 |
st.warning("No results found for the BioPortal query.")
|
213 |
return None
|
214 |
-
except
|
215 |
st.error(f"BioPortal API request failed: {e} Please check connectivity and ensure you have the correct API Key.")
|
216 |
logging.error(f"BioPortal API request failed: {e}")
|
217 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
def _save_pdf_report(report_content: str, filename: str):
|
220 |
"""Saves content to a PDF file."""
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
|
|
|
|
|
|
|
|
|
|
229 |
"""Displays data in a dataframe format."""
|
230 |
if data:
|
231 |
df = pd.DataFrame(data, columns=columns)
|
232 |
st.dataframe(df)
|
233 |
return df
|
234 |
else:
|
235 |
-
|
236 |
-
|
237 |
|
238 |
# --- Streamlit App Configuration ---
|
239 |
st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
|
@@ -241,7 +439,7 @@ st.title("🔬 Pharma Research Expert Platform")
|
|
241 |
st.markdown("An integrated platform for drug discovery, clinical research, and regulatory affairs.")
|
242 |
|
243 |
# --- Tabs ---
|
244 |
-
tabs = st.tabs(["💊 Drug Development", "📊 Trial Analytics", "🧬 Molecular Profiling", "📜 Regulatory Intelligence", "📚 Literature Search"])
|
245 |
|
246 |
# --- Tab 1: Drug Development ---
|
247 |
with tabs[0]:
|
@@ -254,7 +452,7 @@ with tabs[0]:
|
|
254 |
with st.spinner("Analyzing target and competitive landscape..."):
|
255 |
# AI-generated content with regulatory checks
|
256 |
plan_prompt = f"""Develop a comprehensive drug development plan for the treatment of {target} using a {strategy} strategy.
|
257 |
-
Include sections on target validation, lead optimization, preclinical testing, clinical trial design, regulatory submission strategy, market analysis, and competitive landscape. Highlight key milestones and potential challenges.
|
258 |
plan = content_agent.run(plan_prompt)
|
259 |
|
260 |
st.subheader("Comprehensive Development Plan")
|
@@ -270,16 +468,48 @@ with tabs[0]:
|
|
270 |
else:
|
271 |
st.write("No relevant FDA data found.")
|
272 |
else:
|
273 |
-
st.write("Please enter a target to get relevant FDA data")
|
274 |
|
275 |
# Pharmacogenomic integration
|
276 |
st.subheader("Pharmacogenomic Considerations")
|
277 |
-
|
278 |
-
|
279 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
else:
|
281 |
-
st.write("
|
282 |
-
|
283 |
|
284 |
# --- Tab 2: Clinical Trial Analytics ---
|
285 |
with tabs[1]:
|
@@ -289,17 +519,16 @@ with tabs[1]:
|
|
289 |
if st.button("Analyze Trial Landscape"):
|
290 |
with st.spinner("Aggregating global trial data..."):
|
291 |
trials = _get_clinical_trials(trial_query)
|
292 |
-
if trials and trials['studies']:
|
293 |
st.subheader("Recent Clinical Trials")
|
294 |
trial_data = []
|
295 |
for study in trials['studies'][:5]:
|
296 |
-
|
297 |
trial_data.append({
|
298 |
-
"Title": study
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
|
304 |
trial_df = _display_dataframe(trial_data, list(trial_data[0].keys())) if trial_data else None
|
305 |
|
@@ -307,51 +536,38 @@ with tabs[1]:
|
|
307 |
st.markdown("### Clinical Trial Summary (First 5 trials)")
|
308 |
st.dataframe(trial_df)
|
309 |
|
310 |
-
|
311 |
# Adverse events analysis
|
312 |
ae_data = _analyze_adverse_events(trial_query)
|
313 |
-
if ae_data and ae_data['results']:
|
314 |
st.subheader("Adverse Event Profile (Top 5 Reports)")
|
315 |
|
316 |
ae_results = ae_data['results'][:5]
|
317 |
-
ae_df = pd.
|
318 |
st.dataframe(ae_df)
|
319 |
|
320 |
-
#Visualization of adverse events
|
321 |
-
if 'patient' in ae_df and not ae_df.empty:
|
322 |
try:
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
#
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
fig, ax = plt.subplots(figsize=(10,6))
|
338 |
-
sns.barplot(x=top_reactions.index, y=top_reactions.values, ax=ax)
|
339 |
-
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
|
340 |
-
plt.title('Top Adverse Reactions')
|
341 |
-
plt.xlabel('Adverse Reaction')
|
342 |
-
plt.ylabel('Frequency')
|
343 |
-
st.pyplot(fig)
|
344 |
-
|
345 |
-
#Display as dataframe
|
346 |
-
st.markdown("### Top 10 Adverse Reaction Summary")
|
347 |
-
st.dataframe(pd.DataFrame({'Reaction': top_reactions.index, 'Frequency': top_reactions.values}))
|
348 |
-
|
349 |
except Exception as e:
|
350 |
st.error(f"Error processing adverse events data: {e}")
|
|
|
|
|
351 |
else:
|
352 |
st.warning("No clinical trials found for the given search term.")
|
353 |
|
354 |
-
|
355 |
# --- Tab 3: Molecular Profiling ---
|
356 |
with tabs[2]:
|
357 |
st.header("Advanced Molecular Profiling")
|
@@ -360,8 +576,12 @@ with tabs[2]:
|
|
360 |
|
361 |
if st.button("Analyze Compound"):
|
362 |
with st.spinner("Querying global databases..."):
|
363 |
-
# SMILES
|
364 |
-
smiles =
|
|
|
|
|
|
|
|
|
365 |
|
366 |
if smiles:
|
367 |
img = _draw_molecule(smiles)
|
@@ -369,22 +589,24 @@ with tabs[2]:
|
|
369 |
st.image(img, caption="2D Structure")
|
370 |
else:
|
371 |
st.error("Compound structure not found in databases.")
|
372 |
-
|
373 |
# PubChem properties
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
|
|
|
|
|
|
385 |
else:
|
386 |
-
st.
|
387 |
-
|
388 |
|
389 |
# --- Tab 4: Regulatory Intelligence ---
|
390 |
with tabs[3]:
|
@@ -393,34 +615,70 @@ with tabs[3]:
|
|
393 |
|
394 |
if st.button("Generate Regulatory Report"):
|
395 |
with st.spinner("Compiling global regulatory status..."):
|
396 |
-
#
|
397 |
-
|
398 |
-
|
399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
|
401 |
st.subheader("Regulatory Status")
|
402 |
-
col1, col2, col3 = st.columns(
|
403 |
with col1:
|
404 |
st.markdown("**FDA Status**")
|
405 |
-
st.write(
|
406 |
with col2:
|
407 |
st.markdown("**EMA Status**")
|
408 |
-
|
409 |
-
st.write("Not Available")
|
410 |
with col3:
|
411 |
st.markdown("**WHO Essential Medicine**")
|
412 |
-
st.write(
|
413 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
414 |
# Save the information to a PDF report
|
415 |
-
regulatory_content = f"### Regulatory Report
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
label="Download Regulatory Report (PDF)",
|
421 |
data=file,
|
422 |
file_name=f"{drug_name}_regulatory_report.pdf",
|
423 |
-
mime="application/pdf"
|
|
|
|
|
424 |
|
425 |
# --- Tab 5: Literature Search ---
|
426 |
with tabs[4]:
|
@@ -428,24 +686,131 @@ with tabs[4]:
|
|
428 |
search_term = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics")
|
429 |
if st.button("Search PubMed"):
|
430 |
with st.spinner("Searching PubMed..."):
|
431 |
-
pubmed_data =
|
432 |
-
if pubmed_data and 'esearchresult' in pubmed_data and 'idlist' in pubmed_data['esearchresult'] and pubmed_data['esearchresult']['idlist']:
|
433 |
st.subheader("PubMed Search Results")
|
434 |
st.write(f"Found {len(pubmed_data['esearchresult']['idlist'])} results for '{search_term}':")
|
435 |
for article_id in pubmed_data['esearchresult']['idlist']:
|
436 |
-
|
437 |
else:
|
438 |
st.write("No results found for that term.")
|
439 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
440 |
st.header("Ontology Search")
|
441 |
ontology_search_term = st.text_input("Enter Search query for Ontology:", placeholder="Enter disease or ontology")
|
442 |
-
ontology_select = st.selectbox("Select Ontology", ["MESH","NCIT","GO", "SNOMEDCT"])
|
443 |
if st.button("Search BioPortal"):
|
444 |
with st.spinner("Searching Ontology..."):
|
445 |
bioportal_data = _get_bioportal_data(ontology_select, ontology_search_term)
|
446 |
-
if bioportal_data and 'collection' in bioportal_data:
|
447 |
st.subheader(f"BioPortal Search Results for {ontology_select}")
|
448 |
for result in bioportal_data['collection']:
|
449 |
-
|
|
|
|
|
450 |
else:
|
451 |
-
st.write("No results found")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
import tempfile
|
7 |
import time
|
8 |
import requests
|
9 |
+
from bs4 import BeautifulSoup
|
10 |
import json
|
11 |
import pandas as pd
|
12 |
import matplotlib.pyplot as plt
|
|
|
14 |
from typing import Optional, Dict, List, Any
|
15 |
import os
|
16 |
import logging
|
17 |
+
from transformers import pipeline
|
18 |
|
19 |
# Setup logging
|
20 |
+
logging.basicConfig(level=logging.ERROR) # Log only errors
|
21 |
|
22 |
# API Endpoints (Centralized Configuration)
|
23 |
API_ENDPOINTS = {
|
24 |
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
|
25 |
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
|
26 |
+
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
|
27 |
"who_drugs": "https://health-products.canada.ca/api/drug/product",
|
28 |
+
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
29 |
+
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
|
30 |
+
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
|
31 |
+
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
|
32 |
+
"bioportal_search": "https://data.bioontology.org/search",
|
33 |
+
"dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
|
34 |
+
"drugbank": "https://go.drugbank.com/unearth/q?utf8=%E2%9C%93&searcher=drugs&query={}",
|
35 |
}
|
36 |
|
37 |
+
# Email addresses and API Keys
|
38 |
+
PUBMED_EMAIL = st.secrets.get("PUB_EMAIL")
|
|
|
|
|
|
|
|
|
|
|
39 |
CLINICALTRIALS_EMAIL = PUBMED_EMAIL
|
40 |
|
41 |
+
BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
|
42 |
+
SEMANTIC_SCHOLAR_API_KEY = st.secrets.get("SEMANTIC_SCHOLAR_API_KEY")
|
43 |
+
OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
|
44 |
+
|
45 |
+
# Check for essential secrets
|
46 |
+
if not PUBMED_EMAIL:
|
47 |
+
st.error("PubMed email not found in secrets. Please add PUB_EMAIL to secrets.")
|
48 |
+
if not BIOPORTAL_API_KEY:
|
49 |
+
st.error("BioPortal API key not found in secrets. Please add BIOPORTAL_API_KEY to secrets.")
|
50 |
+
if not OPENFDA_KEY:
|
51 |
+
st.error("OpenFDA API key not found in secrets. Please add OPENFDA_KEY to secrets.")
|
52 |
+
if not SEMANTIC_SCHOLAR_API_KEY:
|
53 |
+
st.warning("Semantic Scholar API key not found. Some features may be unavailable.")
|
54 |
+
|
55 |
# Initialize AI Agent (Context-aware)
|
56 |
content_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=HfApiModel())
|
57 |
|
58 |
# --- Utility Functions ---
|
59 |
+
def _query_api(endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict]:
|
60 |
"""Handles API requests with robust error handling."""
|
61 |
try:
|
62 |
+
response = requests.get(endpoint, params=params, headers=headers, timeout=15)
|
63 |
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
64 |
return response.json()
|
65 |
+
except requests.exceptions.HTTPError as http_err:
|
66 |
+
st.error(f"HTTP error occurred: {http_err} for endpoint {endpoint}.")
|
67 |
+
logging.error(f"HTTP error occurred: {http_err} for endpoint {endpoint}.")
|
68 |
+
except requests.exceptions.RequestException as req_err:
|
69 |
+
st.error(f"Request exception: {req_err} for endpoint {endpoint}.")
|
70 |
+
logging.error(f"Request exception: {req_err} for endpoint {endpoint}.")
|
71 |
+
except Exception as e:
|
72 |
+
st.error(f"An unexpected error occurred: {e} for endpoint {endpoint}.")
|
73 |
+
logging.error(f"Unexpected error: {e} for endpoint {endpoint}.")
|
74 |
+
return None
|
75 |
|
76 |
+
def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
|
77 |
+
"""Fetches clinical annotations for a given variant from PharmGKB."""
|
78 |
+
endpoint = API_ENDPOINTS["pharmgkb_variant_clinical_annotations"].format(variant_id)
|
79 |
+
data = _query_api(endpoint)
|
80 |
+
if data and 'data' in data:
|
81 |
+
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
else:
|
83 |
+
st.write(f"No relevant pharmacogenomic data found for variant {variant_id}.")
|
84 |
return None
|
85 |
|
86 |
+
def _get_pharmgkb_variants_for_gene(gene_symbol: str) -> Optional[List[str]]:
|
87 |
+
"""Retrieves variant IDs associated with a given gene from PharmGKB."""
|
88 |
+
# First, get the PharmGKB gene ID from the gene symbol
|
89 |
+
gene_search_endpoint = "https://api.pharmgkb.org/v1/data/gene"
|
90 |
+
params = {
|
91 |
+
"name": gene_symbol
|
92 |
+
}
|
93 |
+
gene_data = _query_api(gene_search_endpoint, params)
|
94 |
+
if gene_data and 'data' in gene_data and len(gene_data['data']) > 0:
|
95 |
+
gene_id = gene_data['data'][0]['id']
|
96 |
+
# Now, retrieve variants for this gene
|
97 |
+
variants_endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(gene_id)
|
98 |
+
variants_data = _query_api(variants_endpoint)
|
99 |
+
if variants_data and 'data' in variants_data:
|
100 |
+
variant_ids = [variant['id'] for variant in variants_data['data']]
|
101 |
+
return variant_ids
|
102 |
+
st.warning(f"No variants found for gene: {gene_symbol}")
|
103 |
+
return None
|
104 |
|
105 |
def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
|
106 |
"""Retrieves SMILES from PubChem, returns None on failure."""
|
107 |
url = API_ENDPOINTS["pubchem"].format(drug_name)
|
108 |
data = _query_api(url)
|
109 |
+
if data and 'PC_Compounds' in data and len(data['PC_Compounds']) > 0:
|
110 |
+
# Check if 'props' exists and find SMILES value
|
111 |
+
for prop in data['PC_Compounds'][0].get('props', []):
|
112 |
if 'name' in prop and prop['name'] == 'Canonical SMILES':
|
113 |
return prop['value']['sval']
|
114 |
return None
|
115 |
|
116 |
+
def _draw_molecule(smiles: str) -> Optional[Any]:
|
|
|
117 |
"""Generates a 2D molecule image from SMILES."""
|
118 |
try:
|
119 |
mol = Chem.MolFromSmiles(smiles)
|
|
|
128 |
logging.error(f"Error generating molecule image: {str(e)}")
|
129 |
return None
|
130 |
|
131 |
+
def _get_clinical_trials(query: str, email: Optional[str] = CLINICALTRIALS_EMAIL) -> Optional[Dict]:
|
|
|
132 |
"""Queries clinicaltrials.gov with search term."""
|
133 |
if not email:
|
134 |
st.error("Clinical Trials email not configured.")
|
|
|
140 |
}
|
141 |
else:
|
142 |
params = {
|
143 |
+
"term": query,
|
144 |
+
"retmax": 10,
|
145 |
+
"retmode": "json",
|
146 |
+
"email": email
|
147 |
}
|
148 |
return _query_api(API_ENDPOINTS["clinical_trials"], params)
|
149 |
|
150 |
+
def _get_fda_approval(drug_name: str, api_key: Optional[str] = OPENFDA_KEY) -> Optional[Dict]:
|
|
|
151 |
"""Retrieves FDA approval info."""
|
152 |
if not api_key:
|
153 |
+
st.error("OpenFDA key not configured.")
|
154 |
+
return None
|
155 |
+
query = f'openfda.brand_name:"{drug_name}"'
|
156 |
+
params = {
|
157 |
+
"api_key": api_key,
|
158 |
+
"search": query,
|
159 |
+
"limit": 1
|
160 |
+
}
|
161 |
+
data = _query_api(API_ENDPOINTS["fda_drug_approval"], params)
|
162 |
+
if data and 'results' in data and len(data['results']) > 0:
|
163 |
return data['results'][0]
|
164 |
else:
|
165 |
+
return None
|
166 |
|
167 |
+
def _analyze_adverse_events(drug_name: str, api_key: Optional[str] = OPENFDA_KEY, limit: int = 5) -> Optional[Dict]:
|
168 |
"""Fetches and analyzes adverse event reports from FAERS."""
|
169 |
if not api_key:
|
170 |
st.error("OpenFDA key not configured.")
|
171 |
return None
|
172 |
+
query = f'patient.drug.medicinalproduct:"{drug_name}"'
|
173 |
+
params = {
|
174 |
+
"api_key": api_key,
|
175 |
+
"search": query,
|
176 |
+
"limit": limit
|
177 |
+
}
|
178 |
+
data = _query_api(API_ENDPOINTS["faers_adverse_events"], params)
|
179 |
if data and 'results' in data:
|
180 |
return data
|
181 |
else:
|
182 |
return None
|
183 |
|
184 |
+
def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
|
185 |
+
"""
|
186 |
+
Scrapes EMA website for drug information based on the drug name.
|
187 |
+
"""
|
188 |
+
try:
|
189 |
+
# Construct the search URL
|
190 |
+
search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
|
191 |
+
|
192 |
+
# Fetch the search results page
|
193 |
+
response = requests.get(search_url, timeout=10)
|
194 |
+
response.raise_for_status()
|
195 |
+
|
196 |
+
# Parse the HTML content
|
197 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
198 |
+
|
199 |
+
# Find the first search result link
|
200 |
+
result = soup.find('a', class_='search-result__title')
|
201 |
+
if not result:
|
202 |
+
st.warning(f"No EMA data found for '{drug_name}'.")
|
203 |
+
return None
|
204 |
+
|
205 |
+
# Extract the href attribute for the drug's EMA page
|
206 |
+
drug_page_url = "https://www.ema.europa.eu" + result.get('href')
|
207 |
+
|
208 |
+
# Fetch the drug's EMA page
|
209 |
+
drug_response = requests.get(drug_page_url, timeout=10)
|
210 |
+
drug_response.raise_for_status()
|
211 |
+
|
212 |
+
drug_soup = BeautifulSoup(drug_response.text, 'html.parser')
|
213 |
+
|
214 |
+
# Extract relevant information (e.g., approval status, indications)
|
215 |
+
approval_status = drug_soup.find('span', class_='product-status').get_text(strip=True) if drug_soup.find('span', class_='product-status') else "Not Available"
|
216 |
+
indications = drug_soup.find('div', class_='indications').get_text(strip=True) if drug_soup.find('div', class_='indications') else "Not Available"
|
217 |
+
|
218 |
+
return {
|
219 |
+
"Drug Name": drug_name,
|
220 |
+
"EMA Approval Status": approval_status,
|
221 |
+
"Indications": indications,
|
222 |
+
"EMA Drug Page": drug_page_url
|
223 |
+
}
|
224 |
+
|
225 |
+
except requests.exceptions.HTTPError as http_err:
|
226 |
+
st.error(f"HTTP error occurred while scraping EMA: {http_err}")
|
227 |
+
logging.error(f"HTTP error: {http_err}")
|
228 |
+
except Exception as e:
|
229 |
+
st.error(f"An error occurred while scraping EMA: {e}")
|
230 |
+
logging.error(f"Scraping error: {e}")
|
231 |
+
|
232 |
+
return None
|
233 |
|
234 |
+
def _get_dailymed_label(drug_name: str) -> Optional[Dict]:
|
235 |
+
"""Retrieves drug label information from DailyMed."""
|
236 |
+
try:
|
237 |
+
base_url = API_ENDPOINTS["dailymed"]
|
238 |
+
params = {
|
239 |
+
"drug_name": drug_name,
|
240 |
+
"page": 1,
|
241 |
+
"pagesize": 1
|
242 |
+
}
|
243 |
+
data = _query_api(base_url, params)
|
244 |
+
if data and 'data' in data and len(data['data']) > 0:
|
245 |
+
label_url = data['data'][0]['url']
|
246 |
+
return {"Label URL": label_url}
|
247 |
+
else:
|
248 |
+
st.warning(f"No DailyMed label found for '{drug_name}'.")
|
249 |
+
return None
|
250 |
+
except Exception as e:
|
251 |
+
st.error(f"Error fetching DailyMed data: {e}")
|
252 |
+
logging.error(f"DailyMed error: {e}")
|
253 |
+
return None
|
254 |
|
255 |
def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
|
256 |
"""Fetches data from BioPortal."""
|
257 |
if not BIOPORTAL_API_KEY:
|
258 |
+
st.error("BioPortal API key not found. Please add the BIOPORTAL_API_KEY to secrets.")
|
259 |
+
return None
|
260 |
if not term:
|
261 |
+
st.error("Please provide a search term.")
|
262 |
+
return None
|
263 |
|
264 |
headers = {
|
265 |
"Authorization": f"apikey token={BIOPORTAL_API_KEY}"
|
266 |
+
}
|
267 |
params = {
|
268 |
"q": term,
|
269 |
"ontologies": ontology
|
270 |
}
|
271 |
|
272 |
+
endpoint = API_ENDPOINTS["bioportal_search"]
|
273 |
try:
|
274 |
+
data = _query_api(endpoint, params, headers)
|
|
|
|
|
275 |
if data and 'collection' in data:
|
276 |
return data
|
277 |
else:
|
278 |
st.warning("No results found for the BioPortal query.")
|
279 |
return None
|
280 |
+
except Exception as e:
|
281 |
st.error(f"BioPortal API request failed: {e} Please check connectivity and ensure you have the correct API Key.")
|
282 |
logging.error(f"BioPortal API request failed: {e}")
|
283 |
return None
|
284 |
+
|
285 |
+
def _get_semantic_scholar_papers(query: str, limit: int = 5) -> Optional[List[Dict]]:
|
286 |
+
"""Fetches papers from Semantic Scholar based on the search query."""
|
287 |
+
try:
|
288 |
+
headers = {
|
289 |
+
"x-api-key": SEMANTIC_SCHOLAR_API_KEY
|
290 |
+
}
|
291 |
+
if not headers["x-api-key"]:
|
292 |
+
st.warning("Semantic Scholar API key not found. Please add it to secrets.")
|
293 |
+
return None
|
294 |
+
base_url = "https://api.semanticscholar.org/graph/v1/paper/search"
|
295 |
+
params = {
|
296 |
+
"query": query,
|
297 |
+
"limit": limit,
|
298 |
+
"fields": "title,authors,year,abstract,url"
|
299 |
+
}
|
300 |
+
data = _query_api(base_url, params, headers)
|
301 |
+
if data and 'data' in data:
|
302 |
+
return data['data']
|
303 |
+
else:
|
304 |
+
st.warning("No papers found for the given query.")
|
305 |
+
return None
|
306 |
+
except Exception as e:
|
307 |
+
st.error(f"Error fetching Semantic Scholar data: {e}")
|
308 |
+
logging.error(f"Semantic Scholar error: {e}")
|
309 |
+
return None
|
310 |
+
|
311 |
+
def _get_pharmvar_haplotypes(gene_symbol: str) -> Optional[List[Dict]]:
|
312 |
+
"""Fetches haplotype information from PharmVar for a given gene."""
|
313 |
+
try:
|
314 |
+
base_url = f"https://api.pharmvar.org/v1/gene/{gene_symbol}/haplotypes"
|
315 |
+
data = _query_api(base_url)
|
316 |
+
if data and 'data' in data:
|
317 |
+
return data['data']
|
318 |
+
else:
|
319 |
+
st.warning(f"No haplotype data found for gene '{gene_symbol}'.")
|
320 |
+
return None
|
321 |
+
except Exception as e:
|
322 |
+
st.error(f"Error fetching PharmVar data: {e}")
|
323 |
+
logging.error(f"PharmVar error: {e}")
|
324 |
+
return None
|
325 |
+
|
326 |
+
def _create_variant_network(gene: str, variants: List[str], annotations: Dict):
|
327 |
+
"""Creates an interactive network graph of gene-variant-drug relationships."""
|
328 |
+
import networkx as nx
|
329 |
+
import plotly.graph_objects as go
|
330 |
+
|
331 |
+
G = nx.Graph()
|
332 |
+
G.add_node(gene, color='lightblue')
|
333 |
+
for variant in variants:
|
334 |
+
G.add_node(variant, color='lightgreen')
|
335 |
+
G.add_edge(gene, variant)
|
336 |
+
# Add connected drugs from annotations
|
337 |
+
for drug in annotations.get(variant, []):
|
338 |
+
if drug != 'N/A':
|
339 |
+
G.add_node(drug, color='lightcoral')
|
340 |
+
G.add_edge(variant, drug)
|
341 |
+
|
342 |
+
pos = nx.spring_layout(G)
|
343 |
+
edge_x = []
|
344 |
+
edge_y = []
|
345 |
+
for edge in G.edges():
|
346 |
+
x0, y0 = pos[edge[0]]
|
347 |
+
x1, y1 = pos[edge[1]]
|
348 |
+
edge_x.extend([x0, x1, None])
|
349 |
+
edge_y.extend([y0, y1, None])
|
350 |
+
|
351 |
+
edge_trace = go.Scatter(
|
352 |
+
x=edge_x, y=edge_y,
|
353 |
+
line=dict(width=0.5, color='#888'),
|
354 |
+
hoverinfo='none',
|
355 |
+
mode='lines'
|
356 |
+
)
|
357 |
+
|
358 |
+
node_x = []
|
359 |
+
node_y = []
|
360 |
+
node_text = []
|
361 |
+
node_color = []
|
362 |
+
for node in G.nodes():
|
363 |
+
x, y = pos[node]
|
364 |
+
node_x.append(x)
|
365 |
+
node_y.append(y)
|
366 |
+
node_text.append(node)
|
367 |
+
node_color.append(G.nodes[node]['color'])
|
368 |
+
|
369 |
+
node_trace = go.Scatter(
|
370 |
+
x=node_x, y=node_y,
|
371 |
+
mode='markers+text',
|
372 |
+
hoverinfo='text',
|
373 |
+
text=node_text,
|
374 |
+
textposition="bottom center",
|
375 |
+
marker=dict(
|
376 |
+
showscale=False,
|
377 |
+
colorscale='YlGnBu',
|
378 |
+
color=node_color,
|
379 |
+
size=10,
|
380 |
+
line_width=2
|
381 |
+
)
|
382 |
+
)
|
383 |
|
384 |
+
fig = go.Figure(data=[edge_trace, node_trace],
|
385 |
+
layout=go.Layout(
|
386 |
+
title='<br>Gene-Variant-Drug Network',
|
387 |
+
titlefont_size=16,
|
388 |
+
showlegend=False,
|
389 |
+
hovermode='closest',
|
390 |
+
margin=dict(b=20,l=5,r=5,t=40),
|
391 |
+
annotations=[ dict(
|
392 |
+
text="Network visualization of gene-variant-drug relationships",
|
393 |
+
showarrow=False,
|
394 |
+
xref="paper", yref="paper",
|
395 |
+
x=0.005, y=-0.002 ) ],
|
396 |
+
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
397 |
+
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
|
398 |
+
)
|
399 |
+
return fig
|
400 |
+
|
401 |
+
def _summarize_abstract(abstract: str) -> str:
|
402 |
+
"""Generates a summary of the given abstract using a pre-trained model."""
|
403 |
+
try:
|
404 |
+
summarizer = pipeline("summarization")
|
405 |
+
summary = summarizer(abstract, max_length=130, min_length=30, do_sample=False)
|
406 |
+
return summary[0]['summary_text']
|
407 |
+
except Exception as e:
|
408 |
+
st.error(f"Error summarizing abstract: {e}")
|
409 |
+
logging.error(f"NLP Summarization Error: {e}")
|
410 |
+
return "Summary not available."
|
411 |
+
|
412 |
def _save_pdf_report(report_content: str, filename: str):
|
413 |
"""Saves content to a PDF file."""
|
414 |
+
try:
|
415 |
+
pdf = FPDF()
|
416 |
+
pdf.add_page()
|
417 |
+
pdf.set_font("Arial", size=12)
|
418 |
+
pdf.multi_cell(0, 10, report_content)
|
419 |
+
pdf.output(filename)
|
420 |
+
return filename
|
421 |
+
except Exception as e:
|
422 |
+
st.error(f"Error saving PDF report: {e}")
|
423 |
+
logging.error(f"Error saving PDF report: {e}")
|
424 |
+
return None
|
425 |
+
|
426 |
+
def _display_dataframe(data: List[Dict[str, Any]], columns: List[str]):
|
427 |
"""Displays data in a dataframe format."""
|
428 |
if data:
|
429 |
df = pd.DataFrame(data, columns=columns)
|
430 |
st.dataframe(df)
|
431 |
return df
|
432 |
else:
|
433 |
+
st.warning("No data found for dataframe creation.")
|
434 |
+
return None
|
435 |
|
436 |
# --- Streamlit App Configuration ---
|
437 |
st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
|
|
|
439 |
st.markdown("An integrated platform for drug discovery, clinical research, and regulatory affairs.")
|
440 |
|
441 |
# --- Tabs ---
|
442 |
+
tabs = st.tabs(["💊 Drug Development", "📊 Trial Analytics", "🧬 Molecular Profiling", "📜 Regulatory Intelligence", "📚 Literature Search", "📈 Dashboard"])
|
443 |
|
444 |
# --- Tab 1: Drug Development ---
|
445 |
with tabs[0]:
|
|
|
452 |
with st.spinner("Analyzing target and competitive landscape..."):
|
453 |
# AI-generated content with regulatory checks
|
454 |
plan_prompt = f"""Develop a comprehensive drug development plan for the treatment of {target} using a {strategy} strategy.
|
455 |
+
Include sections on target validation, lead optimization, preclinical testing, clinical trial design, regulatory submission strategy, market analysis, and competitive landscape. Highlight key milestones and potential challenges."""
|
456 |
plan = content_agent.run(plan_prompt)
|
457 |
|
458 |
st.subheader("Comprehensive Development Plan")
|
|
|
468 |
else:
|
469 |
st.write("No relevant FDA data found.")
|
470 |
else:
|
471 |
+
st.write("Please enter a target to get relevant FDA data.")
|
472 |
|
473 |
# Pharmacogenomic integration
|
474 |
st.subheader("Pharmacogenomic Considerations")
|
475 |
+
if target_gene:
|
476 |
+
variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
|
477 |
+
if variant_ids:
|
478 |
+
annotations = {}
|
479 |
+
for variant_id in variant_ids[:5]: # Limit to first 5 variants for brevity
|
480 |
+
pgx_data = _get_pharmgkb_clinical_annotations(variant_id)
|
481 |
+
if pgx_data and 'data' in pgx_data:
|
482 |
+
annotations[variant_id] = [annotation.get('obj2Name', 'N/A') for annotation in pgx_data['data']]
|
483 |
+
else:
|
484 |
+
annotations[variant_id] = []
|
485 |
+
|
486 |
+
# Display annotations
|
487 |
+
for variant_id in variant_ids[:5]:
|
488 |
+
st.write(f"### Clinical Annotations for Variant: {variant_id}")
|
489 |
+
pgx_data = _get_pharmgkb_clinical_annotations(variant_id)
|
490 |
+
if pgx_data and 'data' in pgx_data:
|
491 |
+
for annotation in pgx_data['data']:
|
492 |
+
drug = annotation.get('obj2Name', 'N/A')
|
493 |
+
if drug != 'N/A':
|
494 |
+
st.write(f"- **Drug:** {drug}")
|
495 |
+
else:
|
496 |
+
st.write(f"No clinical annotations found for variant {variant_id}.")
|
497 |
+
|
498 |
+
# Fetch PharmVar haplotypes
|
499 |
+
haplotypes = _get_pharmvar_haplotypes(target_gene)
|
500 |
+
if haplotypes:
|
501 |
+
st.subheader("PharmVar Haplotypes")
|
502 |
+
for hap in haplotypes:
|
503 |
+
st.write(f"- **Haplotype:** {hap.get('name', 'N/A')}")
|
504 |
+
st.write(f" - **Variant IDs:** {', '.join(hap.get('variantIds', []))}")
|
505 |
+
|
506 |
+
# Display variant network
|
507 |
+
fig = _create_variant_network(target_gene, variant_ids[:5], annotations)
|
508 |
+
st.plotly_chart(fig, use_container_width=True)
|
509 |
+
else:
|
510 |
+
st.write("No variants found for the specified gene.")
|
511 |
else:
|
512 |
+
st.write("Please enter a target gene to retrieve pharmacogenomic data.")
|
|
|
513 |
|
514 |
# --- Tab 2: Clinical Trial Analytics ---
|
515 |
with tabs[1]:
|
|
|
519 |
if st.button("Analyze Trial Landscape"):
|
520 |
with st.spinner("Aggregating global trial data..."):
|
521 |
trials = _get_clinical_trials(trial_query)
|
522 |
+
if trials and 'studies' in trials and len(trials['studies']) > 0:
|
523 |
st.subheader("Recent Clinical Trials")
|
524 |
trial_data = []
|
525 |
for study in trials['studies'][:5]:
|
|
|
526 |
trial_data.append({
|
527 |
+
"Title": study.get('briefTitle', 'N/A'),
|
528 |
+
"Status": study.get('overallStatus', 'N/A'),
|
529 |
+
"Phase": study.get('phase', 'Not Available'),
|
530 |
+
"Enrollment": study.get('enrollmentCount', 'Not Available')
|
531 |
+
})
|
532 |
|
533 |
trial_df = _display_dataframe(trial_data, list(trial_data[0].keys())) if trial_data else None
|
534 |
|
|
|
536 |
st.markdown("### Clinical Trial Summary (First 5 trials)")
|
537 |
st.dataframe(trial_df)
|
538 |
|
|
|
539 |
# Adverse events analysis
|
540 |
ae_data = _analyze_adverse_events(trial_query)
|
541 |
+
if ae_data and 'results' in ae_data and len(ae_data['results']) > 0:
|
542 |
st.subheader("Adverse Event Profile (Top 5 Reports)")
|
543 |
|
544 |
ae_results = ae_data['results'][:5]
|
545 |
+
ae_df = pd.json_normalize(ae_results)
|
546 |
st.dataframe(ae_df)
|
547 |
|
548 |
+
# Visualization of adverse events
|
549 |
+
if 'patient.reaction.reactionmeddrapt' in ae_df.columns and not ae_df.empty:
|
550 |
try:
|
551 |
+
reactions = ae_df['patient.reaction.reactionmeddrapt'].explode().dropna()
|
552 |
+
top_reactions = reactions.value_counts().nlargest(10)
|
553 |
+
|
554 |
+
fig, ax = plt.subplots(figsize=(10,6))
|
555 |
+
sns.barplot(x=top_reactions.values, y=top_reactions.index, ax=ax)
|
556 |
+
plt.title('Top Adverse Reactions')
|
557 |
+
plt.xlabel('Frequency')
|
558 |
+
plt.ylabel('Adverse Reaction')
|
559 |
+
st.pyplot(fig)
|
560 |
+
|
561 |
+
# Display as dataframe
|
562 |
+
st.markdown("### Top 10 Adverse Reaction Summary")
|
563 |
+
st.dataframe(pd.DataFrame({'Reaction': top_reactions.index, 'Frequency': top_reactions.values}))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
564 |
except Exception as e:
|
565 |
st.error(f"Error processing adverse events data: {e}")
|
566 |
+
else:
|
567 |
+
st.write("No adverse event data found.")
|
568 |
else:
|
569 |
st.warning("No clinical trials found for the given search term.")
|
570 |
|
|
|
571 |
# --- Tab 3: Molecular Profiling ---
|
572 |
with tabs[2]:
|
573 |
st.header("Advanced Molecular Profiling")
|
|
|
576 |
|
577 |
if st.button("Analyze Compound"):
|
578 |
with st.spinner("Querying global databases..."):
|
579 |
+
# Determine if input is a SMILES string
|
580 |
+
smiles = None
|
581 |
+
if Chem.MolFromSmiles(compound_input):
|
582 |
+
smiles = compound_input
|
583 |
+
else:
|
584 |
+
smiles = _get_pubchem_smiles(compound_input)
|
585 |
|
586 |
if smiles:
|
587 |
img = _draw_molecule(smiles)
|
|
|
589 |
st.image(img, caption="2D Structure")
|
590 |
else:
|
591 |
st.error("Compound structure not found in databases.")
|
592 |
+
|
593 |
# PubChem properties
|
594 |
+
if compound_input:
|
595 |
+
pubchem_data = _query_api(API_ENDPOINTS["pubchem"].format(compound_input))
|
596 |
+
if pubchem_data and 'PC_Compounds' in pubchem_data and len(pubchem_data['PC_Compounds']) > 0:
|
597 |
+
st.subheader("Physicochemical Properties")
|
598 |
+
props = pubchem_data['PC_Compounds'][0].get('props', [])
|
599 |
+
mw = next((prop['value']['sval'] for prop in props if 'name' in prop and prop['name'] == 'Molecular Weight'), 'N/A')
|
600 |
+
logp = next((prop['value']['sval'] for prop in props if 'name' in prop and prop['name'] == 'LogP'), 'N/A')
|
601 |
+
|
602 |
+
st.write(f"""
|
603 |
+
**Molecular Weight:** {mw}
|
604 |
+
**LogP:** {logp}
|
605 |
+
""")
|
606 |
+
else:
|
607 |
+
st.error("Physicochemical properties not found.")
|
608 |
else:
|
609 |
+
st.warning("Please enter a compound identifier.")
|
|
|
610 |
|
611 |
# --- Tab 4: Regulatory Intelligence ---
|
612 |
with tabs[3]:
|
|
|
615 |
|
616 |
if st.button("Generate Regulatory Report"):
|
617 |
with st.spinner("Compiling global regulatory status..."):
|
618 |
+
# FDA Status
|
619 |
+
fda_info = _get_fda_approval(drug_name)
|
620 |
+
fda_status = "Not Approved"
|
621 |
+
if fda_info and 'openfda' in fda_info and 'brand_name' in fda_info['openfda']:
|
622 |
+
fda_status = ', '.join(fda_info['openfda']['brand_name'])
|
623 |
+
|
624 |
+
# EMA Status via Scraping
|
625 |
+
ema_info = scrape_ema_drug_info(drug_name)
|
626 |
+
ema_status = ema_info['EMA Approval Status'] if ema_info else "Not Available"
|
627 |
+
|
628 |
+
# WHO Essential Medicine
|
629 |
+
who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
|
630 |
+
who_status = "Yes" if who else "No"
|
631 |
+
|
632 |
+
# DailyMed Label
|
633 |
+
dailymed_info = _get_dailymed_label(drug_name)
|
634 |
+
dailymed_status = dailymed_info['Label URL'] if dailymed_info else "Not Available"
|
635 |
|
636 |
st.subheader("Regulatory Status")
|
637 |
+
col1, col2, col3, col4 = st.columns(4)
|
638 |
with col1:
|
639 |
st.markdown("**FDA Status**")
|
640 |
+
st.write(fda_status)
|
641 |
with col2:
|
642 |
st.markdown("**EMA Status**")
|
643 |
+
st.write(ema_status)
|
|
|
644 |
with col3:
|
645 |
st.markdown("**WHO Essential Medicine**")
|
646 |
+
st.write(who_status)
|
647 |
+
with col4:
|
648 |
+
st.markdown("**DailyMed Label**")
|
649 |
+
st.write(dailymed_status)
|
650 |
+
if dailymed_info and 'Label URL' in dailymed_info:
|
651 |
+
st.markdown(f"[View DailyMed Label]({dailymed_info['Label URL']})")
|
652 |
+
|
653 |
+
# Display EMA Drug Page Link if available
|
654 |
+
if ema_info and 'EMA Drug Page' in ema_info:
|
655 |
+
st.markdown(f"[View EMA Drug Page]({ema_info['EMA Drug Page']})")
|
656 |
+
|
657 |
# Save the information to a PDF report
|
658 |
+
regulatory_content = f"""### Regulatory Report
|
659 |
+
|
660 |
+
**FDA Status:** {fda_status}
|
661 |
+
|
662 |
+
**EMA Status:** {ema_status}
|
663 |
+
|
664 |
+
**WHO Essential Medicine:** {who_status}
|
665 |
+
|
666 |
+
**DailyMed Label:** {dailymed_status}
|
667 |
+
"""
|
668 |
+
if ema_info and 'EMA Drug Page' in ema_info:
|
669 |
+
regulatory_content += f"\n**EMA Drug Page:** {ema_info['EMA Drug Page']}\n"
|
670 |
+
|
671 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
672 |
+
report_file = _save_pdf_report(regulatory_content, tmp_file.name)
|
673 |
+
if report_file:
|
674 |
+
with open(report_file, "rb") as file:
|
675 |
+
st.download_button(
|
676 |
label="Download Regulatory Report (PDF)",
|
677 |
data=file,
|
678 |
file_name=f"{drug_name}_regulatory_report.pdf",
|
679 |
+
mime="application/pdf"
|
680 |
+
)
|
681 |
+
os.remove(report_file) # Clean up the temporary file
|
682 |
|
683 |
# --- Tab 5: Literature Search ---
|
684 |
with tabs[4]:
|
|
|
686 |
search_term = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics")
|
687 |
if st.button("Search PubMed"):
|
688 |
with st.spinner("Searching PubMed..."):
|
689 |
+
pubmed_data = _get_pubmed(search_term)
|
690 |
+
if pubmed_data and 'esearchresult' in pubmed_data and 'idlist' in pubmed_data['esearchresult'] and len(pubmed_data['esearchresult']['idlist']) > 0:
|
691 |
st.subheader("PubMed Search Results")
|
692 |
st.write(f"Found {len(pubmed_data['esearchresult']['idlist'])} results for '{search_term}':")
|
693 |
for article_id in pubmed_data['esearchresult']['idlist']:
|
694 |
+
st.markdown(f"- [PMID: {article_id}](https://pubmed.ncbi.nlm.nih.gov/{article_id}/)")
|
695 |
else:
|
696 |
st.write("No results found for that term.")
|
697 |
+
|
698 |
+
# Enhanced Literature with Semantic Scholar
|
699 |
+
st.header("Semantic Scholar Literature Search")
|
700 |
+
semantic_query = st.text_input("Enter search query for Semantic Scholar:", placeholder="e.g., Alzheimer's disease genetics")
|
701 |
+
if st.button("Search Semantic Scholar"):
|
702 |
+
with st.spinner("Searching Semantic Scholar..."):
|
703 |
+
semantic_papers = _get_semantic_scholar_papers(semantic_query)
|
704 |
+
if semantic_papers:
|
705 |
+
st.subheader("Semantic Scholar Search Results")
|
706 |
+
for paper in semantic_papers:
|
707 |
+
title = paper.get('title', 'N/A')
|
708 |
+
authors = ', '.join([author['name'] for author in paper.get('authors', [])])
|
709 |
+
year = paper.get('year', 'N/A')
|
710 |
+
url = paper.get('url', '#')
|
711 |
+
abstract = paper.get('abstract', 'No abstract available.')
|
712 |
+
summary = _summarize_abstract(abstract) if abstract != 'No abstract available.' else "No summary available."
|
713 |
+
|
714 |
+
st.markdown(f"### [{title}]({url})")
|
715 |
+
st.write(f"**Authors:** {authors}")
|
716 |
+
st.write(f"**Year:** {year}")
|
717 |
+
st.write(f"**Abstract Summary:** {summary}")
|
718 |
+
st.write("---")
|
719 |
+
else:
|
720 |
+
st.write("No results found for that term.")
|
721 |
+
|
722 |
+
# Ontology Search
|
723 |
st.header("Ontology Search")
|
724 |
ontology_search_term = st.text_input("Enter Search query for Ontology:", placeholder="Enter disease or ontology")
|
725 |
+
ontology_select = st.selectbox("Select Ontology", ["MESH", "NCIT", "GO", "SNOMEDCT"])
|
726 |
if st.button("Search BioPortal"):
|
727 |
with st.spinner("Searching Ontology..."):
|
728 |
bioportal_data = _get_bioportal_data(ontology_select, ontology_search_term)
|
729 |
+
if bioportal_data and 'collection' in bioportal_data and len(bioportal_data['collection']) > 0:
|
730 |
st.subheader(f"BioPortal Search Results for {ontology_select}")
|
731 |
for result in bioportal_data['collection']:
|
732 |
+
label = result.get('prefLabel', 'N/A')
|
733 |
+
ontology_id = result.get('@id', 'N/A')
|
734 |
+
st.markdown(f"- **{label}** ({ontology_id})")
|
735 |
else:
|
736 |
+
st.write("No results found.")
|
737 |
+
|
738 |
+
# --- Tab 6: Dashboard ---
|
739 |
+
with tabs[5]:
|
740 |
+
st.header("Comprehensive Dashboard")
|
741 |
+
|
742 |
+
# Sample KPI Counts (Replace with actual data)
|
743 |
+
fda_count = 5000 # Placeholder
|
744 |
+
ema_count = 3000
|
745 |
+
who_count = 1500
|
746 |
+
clinical_trials_count = 12000
|
747 |
+
publications_count = 250000
|
748 |
+
|
749 |
+
def _create_kpi_dashboard(fda_count: int, ema_count: int, who_count: int, trials: int, publications: int):
|
750 |
+
"""Creates KPI cards for the dashboard."""
|
751 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
752 |
+
col1.metric("FDA Approved Drugs", fda_count)
|
753 |
+
col2.metric("EMA Approved Drugs", ema_count)
|
754 |
+
col3.metric("WHO Essential Medicines", who_count)
|
755 |
+
col4.metric("Ongoing Clinical Trials", trials)
|
756 |
+
col5.metric("Publications Found", publications)
|
757 |
+
|
758 |
+
_create_kpi_dashboard(fda_count, ema_count, who_count, clinical_trials_count, publications_count)
|
759 |
+
|
760 |
+
# Trend Graphs
|
761 |
+
st.subheader("Trends Over Time")
|
762 |
+
# Example: Number of FDA approvals per year
|
763 |
+
years = list(range(2000, 2026))
|
764 |
+
fda_approvals = [fda_count // 26] * len(years) # Placeholder data
|
765 |
+
|
766 |
+
fig, ax = plt.subplots(figsize=(10,6))
|
767 |
+
sns.lineplot(x=years, y=fda_approvals, marker='o', ax=ax)
|
768 |
+
ax.set_title('FDA Drug Approvals Over Years')
|
769 |
+
ax.set_xlabel('Year')
|
770 |
+
ax.set_ylabel('Number of Approvals')
|
771 |
+
st.pyplot(fig)
|
772 |
+
|
773 |
+
# Geographical Maps
|
774 |
+
st.subheader("Geographical Distribution of Clinical Trials")
|
775 |
+
# Placeholder: Actual implementation would require location data from ClinicalTrials.gov
|
776 |
+
# Example: Display a static map
|
777 |
+
st.write("Feature under development. Geographical data from clinical trials will be visualized here.")
|
778 |
+
|
779 |
+
# Interactive Network Graph
|
780 |
+
st.subheader("Gene-Variant-Drug Network")
|
781 |
+
# Placeholder: Fetch sample gene, variants, and drugs
|
782 |
+
sample_gene = "CYP2C19"
|
783 |
+
sample_variants = ["rs4244285", "rs12248560"]
|
784 |
+
sample_annotations = {
|
785 |
+
"rs4244285": ["Clopidogrel", "Omeprazole"],
|
786 |
+
"rs12248560": ["Sertraline"]
|
787 |
+
}
|
788 |
+
|
789 |
+
fig = _create_variant_network(sample_gene, sample_variants, sample_annotations)
|
790 |
+
st.plotly_chart(fig, use_container_width=True)
|
791 |
+
|
792 |
+
# --- Additional Enhancements: User Authentication (Optional) ---
|
793 |
+
# Implement user authentication if needed using streamlit-authenticator or Auth0
|
794 |
+
|
795 |
+
# --- Additional Enhancements: Caching ---
|
796 |
+
@st.cache_data(ttl=3600)
|
797 |
+
def cached_get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
|
798 |
+
"""Cached version of fetching clinical annotations from PharmGKB."""
|
799 |
+
return _get_pharmgkb_clinical_annotations(variant_id)
|
800 |
+
|
801 |
+
@st.cache_data(ttl=3600)
|
802 |
+
def cached_get_pubchem_smiles(drug_name: str) -> Optional[str]:
|
803 |
+
"""Cached version of retrieving SMILES from PubChem."""
|
804 |
+
return _get_pubchem_smiles(drug_name)
|
805 |
+
|
806 |
+
# --- Final Touches ---
|
807 |
+
st.sidebar.header("About")
|
808 |
+
st.sidebar.info("""
|
809 |
+
**Pharma Research Expert Platform**
|
810 |
+
|
811 |
+
An integrated platform for drug discovery, clinical research, and regulatory affairs.
|
812 |
+
|
813 |
+
**Developed by:** Your Name
|
814 |
+
|
815 |
+
**Contact:** [[email protected]](mailto:[email protected])
|
816 |
+
""")
|