Update app.py
Browse files
app.py
CHANGED
@@ -24,23 +24,20 @@ logging.basicConfig(level=logging.ERROR)
|
|
24 |
# API Endpoints (Centralized Configuration)
|
25 |
# -----------------------------
|
26 |
API_ENDPOINTS = {
|
27 |
-
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies", # no email
|
28 |
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
|
29 |
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
|
30 |
-
"who_drugs": "https://health-products.canada.ca/api/drug/product",
|
31 |
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
32 |
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
|
33 |
-
# PharmGKB endpoints
|
34 |
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
|
35 |
"pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
|
36 |
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
|
37 |
"bioportal_search": "https://data.bioontology.org/search",
|
38 |
-
#
|
39 |
-
"dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
|
40 |
-
# RxNorm endpoints (examples)
|
41 |
"rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
|
42 |
"rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
|
43 |
-
# RxClass endpoint example
|
44 |
"rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
|
45 |
}
|
46 |
|
@@ -125,8 +122,27 @@ def _draw_molecule(smiles: str) -> Optional[Any]:
|
|
125 |
logging.error(f"Molecule drawing error: {e}")
|
126 |
return None
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
def _get_clinical_trials(query: str) -> Optional[Dict]:
|
129 |
-
"""Queries ClinicalTrials.gov using the 'query.term' parameter
|
130 |
if query.upper().startswith("NCT") and query[3:].isdigit():
|
131 |
params = {"id": query, "fmt": "json"}
|
132 |
else:
|
@@ -197,59 +213,6 @@ def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
|
|
197 |
st.write(f"No data found for PharmGKB gene {pharmgkb_gene_id}.")
|
198 |
return None
|
199 |
|
200 |
-
def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
|
201 |
-
"""Scrapes EMA website for drug information using browser-like headers."""
|
202 |
-
try:
|
203 |
-
search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
|
204 |
-
headers = {
|
205 |
-
"User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
206 |
-
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
207 |
-
"Chrome/90.0.4430.93 Safari/537.36")
|
208 |
-
}
|
209 |
-
response = requests.get(search_url, headers=headers, timeout=10)
|
210 |
-
response.raise_for_status()
|
211 |
-
soup = BeautifulSoup(response.text, "html.parser")
|
212 |
-
result = soup.find("a", class_="search-result__title")
|
213 |
-
if not result:
|
214 |
-
st.warning(f"No EMA data found for '{drug_name}'.")
|
215 |
-
return None
|
216 |
-
drug_page_url = "https://www.ema.europa.eu" + result.get("href")
|
217 |
-
drug_response = requests.get(drug_page_url, headers=headers, timeout=10)
|
218 |
-
drug_response.raise_for_status()
|
219 |
-
drug_soup = BeautifulSoup(drug_response.text, "html.parser")
|
220 |
-
approval_elem = drug_soup.find("span", class_="product-status")
|
221 |
-
approval_status = approval_elem.get_text(strip=True) if approval_elem else "Not Available"
|
222 |
-
indications_elem = drug_soup.find("div", class_="indications")
|
223 |
-
indications = indications_elem.get_text(strip=True) if indications_elem else "Not Available"
|
224 |
-
return {
|
225 |
-
"Drug Name": drug_name,
|
226 |
-
"EMA Approval Status": approval_status,
|
227 |
-
"Indications": indications,
|
228 |
-
"EMA Drug Page": drug_page_url
|
229 |
-
}
|
230 |
-
except Exception as e:
|
231 |
-
st.error(f"Error scraping EMA data: {e}")
|
232 |
-
logging.error(f"EMA scraping error: {e}")
|
233 |
-
return None
|
234 |
-
|
235 |
-
def _get_dailymed_label(drug_name: str) -> Optional[Dict]:
|
236 |
-
"""Retrieves DailyMed label info using the v2 API (returns JSON)."""
|
237 |
-
try:
|
238 |
-
params = {"drug_name": drug_name, "page": 1, "pagesize": 1}
|
239 |
-
data = _query_api(API_ENDPOINTS["dailymed"], params)
|
240 |
-
if data and data.get("data") and len(data["data"]) > 0:
|
241 |
-
label_url = data["data"][0].get("url")
|
242 |
-
if not label_url:
|
243 |
-
st.warning("DailyMed label URL not found.")
|
244 |
-
return None
|
245 |
-
return {"Label URL": label_url}
|
246 |
-
st.warning(f"No DailyMed label found for '{drug_name}'.")
|
247 |
-
return None
|
248 |
-
except Exception as e:
|
249 |
-
st.error(f"Error fetching DailyMed data: {e}")
|
250 |
-
logging.error(f"DailyMed error: {e}")
|
251 |
-
return None
|
252 |
-
|
253 |
def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
|
254 |
"""Fetches ontology data from BioPortal."""
|
255 |
if not BIOPORTAL_API_KEY:
|
@@ -371,10 +334,31 @@ def get_rxclass_by_drug_name(drug_name: str) -> Optional[Dict]:
|
|
371 |
url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
|
372 |
return _query_api(url)
|
373 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
# -----------------------------
|
375 |
# Streamlit App Layout and Tabs
|
376 |
# -----------------------------
|
377 |
-
#
|
378 |
tabs = st.tabs([
|
379 |
"π Drug Development",
|
380 |
"π Trial Analytics",
|
@@ -521,6 +505,7 @@ with tabs[2]:
|
|
521 |
# -----------------------------
|
522 |
with tabs[3]:
|
523 |
st.header("Global Regulatory Monitoring")
|
|
|
524 |
drug_name = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
|
525 |
|
526 |
if st.button("Generate Regulatory Report"):
|
@@ -531,47 +516,33 @@ with tabs[3]:
|
|
531 |
if fda_info and fda_info.get("openfda", {}).get("brand_name"):
|
532 |
fda_status = ", ".join(fda_info["openfda"]["brand_name"])
|
533 |
|
534 |
-
#
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
# DailyMed Label
|
543 |
-
dailymed_info = _get_dailymed_label(drug_name)
|
544 |
-
dailymed_status = dailymed_info.get("Label URL") if dailymed_info else "Not Available"
|
545 |
|
546 |
st.subheader("Regulatory Status")
|
547 |
-
col1, col2
|
548 |
with col1:
|
549 |
st.markdown("**FDA Status**")
|
550 |
st.write(fda_status)
|
551 |
with col2:
|
552 |
-
st.markdown("**
|
553 |
-
st.write(
|
554 |
-
|
555 |
-
st.
|
556 |
-
st.write(who_status)
|
557 |
-
with col4:
|
558 |
-
st.markdown("**DailyMed Label**")
|
559 |
-
st.write(dailymed_status)
|
560 |
-
if dailymed_info and dailymed_info.get("Label URL"):
|
561 |
-
st.markdown(f"[View DailyMed Label]({dailymed_info['Label URL']})")
|
562 |
-
if ema_info and ema_info.get("EMA Drug Page"):
|
563 |
-
st.markdown(f"[View EMA Drug Page]({ema_info['EMA Drug Page']})")
|
564 |
|
565 |
regulatory_content = (
|
566 |
f"### Regulatory Report\n\n"
|
567 |
f"**FDA Status:** {fda_status}\n\n"
|
568 |
-
f"**
|
569 |
-
f"**
|
570 |
-
f"**
|
571 |
)
|
572 |
-
if ema_info and ema_info.get("EMA Drug Page"):
|
573 |
-
regulatory_content += f"\n**EMA Drug Page:** {ema_info['EMA Drug Page']}\n"
|
574 |
-
|
575 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
576 |
report_file = _save_pdf_report(regulatory_content, tmp_file.name)
|
577 |
if report_file:
|
@@ -624,20 +595,18 @@ with tabs[5]:
|
|
624 |
|
625 |
# Placeholder KPI counts (replace with real aggregated data if available)
|
626 |
fda_count = 5000 # Example value
|
627 |
-
ema_count = 3000 # Example value
|
628 |
-
who_count = 1500 # Example value
|
629 |
trials_count = 12000 # Example value
|
630 |
pub_count = 250000 # Example value
|
631 |
|
632 |
-
def _create_kpi_dashboard(fda: int,
|
633 |
-
col1, col2, col3
|
634 |
col1.metric("FDA Approved Drugs", fda)
|
635 |
-
col2.metric("
|
636 |
-
col3.metric("
|
637 |
-
col4.metric("Ongoing Trials", trials)
|
638 |
-
col5.metric("Publications", pubs)
|
639 |
|
640 |
-
_create_kpi_dashboard(fda_count,
|
641 |
|
642 |
st.subheader("Trend Analysis")
|
643 |
years = list(range(2000, 2026))
|
@@ -671,13 +640,6 @@ with tabs[6]:
|
|
671 |
|
672 |
if st.button("Retrieve Drug Data"):
|
673 |
with st.spinner("Fetching drug data from multiple sources..."):
|
674 |
-
# DailyMed β using our existing DailyMed label function
|
675 |
-
dailymed_label = _get_dailymed_label(drug_query)
|
676 |
-
if dailymed_label:
|
677 |
-
dm_label = dailymed_label.get("Label URL", "Not Available")
|
678 |
-
else:
|
679 |
-
dm_label = "Not Available"
|
680 |
-
|
681 |
# RxNorm β Get RxCUI and then properties
|
682 |
rxnorm_id = get_rxnorm_rxcui(drug_query)
|
683 |
if rxnorm_id:
|
@@ -688,9 +650,6 @@ with tabs[6]:
|
|
688 |
# RxClass β Get classes related to the drug
|
689 |
rxclass_data = get_rxclass_by_drug_name(drug_query)
|
690 |
|
691 |
-
st.subheader("DailyMed Label")
|
692 |
-
st.write(f"DailyMed Label URL: {dm_label}")
|
693 |
-
|
694 |
st.subheader("RxNorm Data")
|
695 |
if rxnorm_id:
|
696 |
st.write(f"RxCUI for {drug_query}: {rxnorm_id}")
|
@@ -702,9 +661,19 @@ with tabs[6]:
|
|
702 |
st.write("No RxCUI found for the given drug name.")
|
703 |
|
704 |
st.subheader("RxClass Information")
|
705 |
-
if rxclass_data:
|
706 |
st.json(rxclass_data)
|
707 |
else:
|
708 |
st.write("No RxClass data found for the given drug.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
709 |
|
710 |
|
|
|
24 |
# API Endpoints (Centralized Configuration)
|
25 |
# -----------------------------
|
26 |
API_ENDPOINTS = {
|
27 |
+
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies", # no email required
|
28 |
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
|
29 |
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
|
|
|
30 |
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
31 |
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
|
32 |
+
# PharmGKB endpoints (these require a PharmGKB accession such as "PA1234")
|
33 |
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
|
34 |
"pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
|
35 |
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
|
36 |
"bioportal_search": "https://data.bioontology.org/search",
|
37 |
+
# RxNorm endpoints
|
|
|
|
|
38 |
"rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
|
39 |
"rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
|
40 |
+
# RxClass endpoint (example)
|
41 |
"rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
|
42 |
}
|
43 |
|
|
|
122 |
logging.error(f"Molecule drawing error: {e}")
|
123 |
return None
|
124 |
|
125 |
+
def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
|
126 |
+
"""Retrieves drug details such as molecular formula and IUPAC name from PubChem."""
|
127 |
+
url = API_ENDPOINTS["pubchem"].format(drug_name)
|
128 |
+
data = _query_api(url)
|
129 |
+
if data and data.get("PC_Compounds"):
|
130 |
+
compound = data["PC_Compounds"][0]
|
131 |
+
details = {}
|
132 |
+
for prop in compound.get("props", []):
|
133 |
+
urn = prop.get("urn", {})
|
134 |
+
if urn.get("label") == "Molecular Formula":
|
135 |
+
details["Molecular Formula"] = prop["value"]["sval"]
|
136 |
+
elif urn.get("name") in ["Preferred", "Systematic"]:
|
137 |
+
# Use Preferred IUPAC name if available
|
138 |
+
details["IUPAC Name"] = prop["value"]["sval"]
|
139 |
+
elif prop.get("name") == "Canonical SMILES":
|
140 |
+
details["Canonical SMILES"] = prop["value"]["sval"]
|
141 |
+
return details
|
142 |
+
return None
|
143 |
+
|
144 |
def _get_clinical_trials(query: str) -> Optional[Dict]:
|
145 |
+
"""Queries ClinicalTrials.gov using the 'query.term' parameter."""
|
146 |
if query.upper().startswith("NCT") and query[3:].isdigit():
|
147 |
params = {"id": query, "fmt": "json"}
|
148 |
else:
|
|
|
213 |
st.write(f"No data found for PharmGKB gene {pharmgkb_gene_id}.")
|
214 |
return None
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
|
217 |
"""Fetches ontology data from BioPortal."""
|
218 |
if not BIOPORTAL_API_KEY:
|
|
|
334 |
url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
|
335 |
return _query_api(url)
|
336 |
|
337 |
+
# -----------------------------
|
338 |
+
# New Function: PubChem Drug Details
|
339 |
+
# -----------------------------
|
340 |
+
def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
|
341 |
+
"""Retrieves generic drug details (molecular formula, IUPAC name, and canonical SMILES) from PubChem."""
|
342 |
+
url = API_ENDPOINTS["pubchem"].format(drug_name)
|
343 |
+
data = _query_api(url)
|
344 |
+
details = {}
|
345 |
+
if data and data.get("PC_Compounds"):
|
346 |
+
compound = data["PC_Compounds"][0]
|
347 |
+
for prop in compound.get("props", []):
|
348 |
+
urn = prop.get("urn", {})
|
349 |
+
if urn.get("label") == "Molecular Formula":
|
350 |
+
details["Molecular Formula"] = prop["value"]["sval"]
|
351 |
+
if urn.get("name") == "Preferred":
|
352 |
+
details["IUPAC Name"] = prop["value"]["sval"]
|
353 |
+
if prop.get("name") == "Canonical SMILES":
|
354 |
+
details["Canonical SMILES"] = prop["value"]["sval"]
|
355 |
+
return details
|
356 |
+
return None
|
357 |
+
|
358 |
# -----------------------------
|
359 |
# Streamlit App Layout and Tabs
|
360 |
# -----------------------------
|
361 |
+
# Define tabs including a new "Drug Data Integration" tab.
|
362 |
tabs = st.tabs([
|
363 |
"π Drug Development",
|
364 |
"π Trial Analytics",
|
|
|
505 |
# -----------------------------
|
506 |
with tabs[3]:
|
507 |
st.header("Global Regulatory Monitoring")
|
508 |
+
st.markdown("**Note:** EMA, WHO, and DailyMed endpoints have been removed due to persistent errors. Instead, we provide FDA data and generic drug details (formula, IUPAC name, structure) from PubChem.")
|
509 |
drug_name = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
|
510 |
|
511 |
if st.button("Generate Regulatory Report"):
|
|
|
516 |
if fda_info and fda_info.get("openfda", {}).get("brand_name"):
|
517 |
fda_status = ", ".join(fda_info["openfda"]["brand_name"])
|
518 |
|
519 |
+
# PubChem Drug Details for Generic/Formula Info
|
520 |
+
pubchem_details = _get_pubchem_drug_details(drug_name)
|
521 |
+
if pubchem_details:
|
522 |
+
formula = pubchem_details.get("Molecular Formula", "N/A")
|
523 |
+
iupac = pubchem_details.get("IUPAC Name", "N/A")
|
524 |
+
smiles = pubchem_details.get("Canonical SMILES", "N/A")
|
525 |
+
else:
|
526 |
+
formula = iupac = smiles = "Not Available"
|
|
|
|
|
|
|
527 |
|
528 |
st.subheader("Regulatory Status")
|
529 |
+
col1, col2 = st.columns(2)
|
530 |
with col1:
|
531 |
st.markdown("**FDA Status**")
|
532 |
st.write(fda_status)
|
533 |
with col2:
|
534 |
+
st.markdown("**Generic/Formula Details (PubChem)**")
|
535 |
+
st.write(f"**Molecular Formula:** {formula}")
|
536 |
+
st.write(f"**IUPAC Name:** {iupac}")
|
537 |
+
st.write(f"**Canonical SMILES:** {smiles}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
|
539 |
regulatory_content = (
|
540 |
f"### Regulatory Report\n\n"
|
541 |
f"**FDA Status:** {fda_status}\n\n"
|
542 |
+
f"**Molecular Formula:** {formula}\n\n"
|
543 |
+
f"**IUPAC Name:** {iupac}\n\n"
|
544 |
+
f"**Canonical SMILES:** {smiles}\n"
|
545 |
)
|
|
|
|
|
|
|
546 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
547 |
report_file = _save_pdf_report(regulatory_content, tmp_file.name)
|
548 |
if report_file:
|
|
|
595 |
|
596 |
# Placeholder KPI counts (replace with real aggregated data if available)
|
597 |
fda_count = 5000 # Example value
|
598 |
+
ema_count = 3000 # Example value (not used now)
|
599 |
+
who_count = 1500 # Example value (not used now)
|
600 |
trials_count = 12000 # Example value
|
601 |
pub_count = 250000 # Example value
|
602 |
|
603 |
+
def _create_kpi_dashboard(fda: int, trials: int, pubs: int):
|
604 |
+
col1, col2, col3 = st.columns(3)
|
605 |
col1.metric("FDA Approved Drugs", fda)
|
606 |
+
col2.metric("Ongoing Trials", trials)
|
607 |
+
col3.metric("Publications", pubs)
|
|
|
|
|
608 |
|
609 |
+
_create_kpi_dashboard(fda_count, trials_count, pub_count)
|
610 |
|
611 |
st.subheader("Trend Analysis")
|
612 |
years = list(range(2000, 2026))
|
|
|
640 |
|
641 |
if st.button("Retrieve Drug Data"):
|
642 |
with st.spinner("Fetching drug data from multiple sources..."):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
643 |
# RxNorm β Get RxCUI and then properties
|
644 |
rxnorm_id = get_rxnorm_rxcui(drug_query)
|
645 |
if rxnorm_id:
|
|
|
650 |
# RxClass β Get classes related to the drug
|
651 |
rxclass_data = get_rxclass_by_drug_name(drug_query)
|
652 |
|
|
|
|
|
|
|
653 |
st.subheader("RxNorm Data")
|
654 |
if rxnorm_id:
|
655 |
st.write(f"RxCUI for {drug_query}: {rxnorm_id}")
|
|
|
661 |
st.write("No RxCUI found for the given drug name.")
|
662 |
|
663 |
st.subheader("RxClass Information")
|
664 |
+
if rxclass_data and rxclass_data.get("classMember"):
|
665 |
st.json(rxclass_data)
|
666 |
else:
|
667 |
st.write("No RxClass data found for the given drug.")
|
668 |
+
|
669 |
+
# Additionally, show generic drug details from PubChem
|
670 |
+
pubchem_details = _get_pubchem_drug_details(drug_query)
|
671 |
+
st.subheader("PubChem Drug Details")
|
672 |
+
if pubchem_details:
|
673 |
+
st.write(f"**Molecular Formula:** {pubchem_details.get('Molecular Formula', 'N/A')}")
|
674 |
+
st.write(f"**IUPAC Name:** {pubchem_details.get('IUPAC Name', 'N/A')}")
|
675 |
+
st.write(f"**Canonical SMILES:** {pubchem_details.get('Canonical SMILES', 'N/A')}")
|
676 |
+
else:
|
677 |
+
st.write("No PubChem details found for the given drug.")
|
678 |
|
679 |
|