mgbam commited on
Commit
5d65f21
Β·
verified Β·
1 Parent(s): 8bcce34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -112
app.py CHANGED
@@ -24,23 +24,20 @@ logging.basicConfig(level=logging.ERROR)
24
  # API Endpoints (Centralized Configuration)
25
  # -----------------------------
26
  API_ENDPOINTS = {
27
- "clinical_trials": "https://clinicaltrials.gov/api/v2/studies", # no email needed
28
  "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
29
  "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
30
- "who_drugs": "https://health-products.canada.ca/api/drug/product",
31
  "fda_drug_approval": "https://api.fda.gov/drug/label.json",
32
  "faers_adverse_events": "https://api.fda.gov/drug/event.json",
33
- # PharmGKB endpoints: these require a PharmGKB accession (e.g., PA1234)
34
  "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
35
  "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
36
  "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
37
  "bioportal_search": "https://data.bioontology.org/search",
38
- # DailyMed: note the base URL now uses HTTPS and version v2
39
- "dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
40
- # RxNorm endpoints (examples)
41
  "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
42
  "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
43
- # RxClass endpoint example
44
  "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
45
  }
46
 
@@ -125,8 +122,27 @@ def _draw_molecule(smiles: str) -> Optional[Any]:
125
  logging.error(f"Molecule drawing error: {e}")
126
  return None
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  def _get_clinical_trials(query: str) -> Optional[Dict]:
129
- """Queries ClinicalTrials.gov using the 'query.term' parameter (no email required)."""
130
  if query.upper().startswith("NCT") and query[3:].isdigit():
131
  params = {"id": query, "fmt": "json"}
132
  else:
@@ -197,59 +213,6 @@ def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
197
  st.write(f"No data found for PharmGKB gene {pharmgkb_gene_id}.")
198
  return None
199
 
200
- def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
201
- """Scrapes EMA website for drug information using browser-like headers."""
202
- try:
203
- search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
204
- headers = {
205
- "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
206
- "AppleWebKit/537.36 (KHTML, like Gecko) "
207
- "Chrome/90.0.4430.93 Safari/537.36")
208
- }
209
- response = requests.get(search_url, headers=headers, timeout=10)
210
- response.raise_for_status()
211
- soup = BeautifulSoup(response.text, "html.parser")
212
- result = soup.find("a", class_="search-result__title")
213
- if not result:
214
- st.warning(f"No EMA data found for '{drug_name}'.")
215
- return None
216
- drug_page_url = "https://www.ema.europa.eu" + result.get("href")
217
- drug_response = requests.get(drug_page_url, headers=headers, timeout=10)
218
- drug_response.raise_for_status()
219
- drug_soup = BeautifulSoup(drug_response.text, "html.parser")
220
- approval_elem = drug_soup.find("span", class_="product-status")
221
- approval_status = approval_elem.get_text(strip=True) if approval_elem else "Not Available"
222
- indications_elem = drug_soup.find("div", class_="indications")
223
- indications = indications_elem.get_text(strip=True) if indications_elem else "Not Available"
224
- return {
225
- "Drug Name": drug_name,
226
- "EMA Approval Status": approval_status,
227
- "Indications": indications,
228
- "EMA Drug Page": drug_page_url
229
- }
230
- except Exception as e:
231
- st.error(f"Error scraping EMA data: {e}")
232
- logging.error(f"EMA scraping error: {e}")
233
- return None
234
-
235
- def _get_dailymed_label(drug_name: str) -> Optional[Dict]:
236
- """Retrieves DailyMed label info using the v2 API (returns JSON)."""
237
- try:
238
- params = {"drug_name": drug_name, "page": 1, "pagesize": 1}
239
- data = _query_api(API_ENDPOINTS["dailymed"], params)
240
- if data and data.get("data") and len(data["data"]) > 0:
241
- label_url = data["data"][0].get("url")
242
- if not label_url:
243
- st.warning("DailyMed label URL not found.")
244
- return None
245
- return {"Label URL": label_url}
246
- st.warning(f"No DailyMed label found for '{drug_name}'.")
247
- return None
248
- except Exception as e:
249
- st.error(f"Error fetching DailyMed data: {e}")
250
- logging.error(f"DailyMed error: {e}")
251
- return None
252
-
253
  def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
254
  """Fetches ontology data from BioPortal."""
255
  if not BIOPORTAL_API_KEY:
@@ -371,10 +334,31 @@ def get_rxclass_by_drug_name(drug_name: str) -> Optional[Dict]:
371
  url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
372
  return _query_api(url)
373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  # -----------------------------
375
  # Streamlit App Layout and Tabs
376
  # -----------------------------
377
- # Add a new tab "πŸ§ͺ Drug Data Integration" to bring together various drug APIs.
378
  tabs = st.tabs([
379
  "πŸ’Š Drug Development",
380
  "πŸ“Š Trial Analytics",
@@ -521,6 +505,7 @@ with tabs[2]:
521
  # -----------------------------
522
  with tabs[3]:
523
  st.header("Global Regulatory Monitoring")
 
524
  drug_name = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
525
 
526
  if st.button("Generate Regulatory Report"):
@@ -531,47 +516,33 @@ with tabs[3]:
531
  if fda_info and fda_info.get("openfda", {}).get("brand_name"):
532
  fda_status = ", ".join(fda_info["openfda"]["brand_name"])
533
 
534
- # EMA Data via Scraping
535
- ema_info = scrape_ema_drug_info(drug_name)
536
- ema_status = ema_info.get("EMA Approval Status") if ema_info else "Not Available"
537
-
538
- # WHO Data (Health Canada API)
539
- who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
540
- who_status = "Yes" if who else "No"
541
-
542
- # DailyMed Label
543
- dailymed_info = _get_dailymed_label(drug_name)
544
- dailymed_status = dailymed_info.get("Label URL") if dailymed_info else "Not Available"
545
 
546
  st.subheader("Regulatory Status")
547
- col1, col2, col3, col4 = st.columns(4)
548
  with col1:
549
  st.markdown("**FDA Status**")
550
  st.write(fda_status)
551
  with col2:
552
- st.markdown("**EMA Status**")
553
- st.write(ema_status)
554
- with col3:
555
- st.markdown("**WHO Essential Medicine**")
556
- st.write(who_status)
557
- with col4:
558
- st.markdown("**DailyMed Label**")
559
- st.write(dailymed_status)
560
- if dailymed_info and dailymed_info.get("Label URL"):
561
- st.markdown(f"[View DailyMed Label]({dailymed_info['Label URL']})")
562
- if ema_info and ema_info.get("EMA Drug Page"):
563
- st.markdown(f"[View EMA Drug Page]({ema_info['EMA Drug Page']})")
564
 
565
  regulatory_content = (
566
  f"### Regulatory Report\n\n"
567
  f"**FDA Status:** {fda_status}\n\n"
568
- f"**EMA Status:** {ema_status}\n\n"
569
- f"**WHO Essential Medicine:** {who_status}\n\n"
570
- f"**DailyMed Label:** {dailymed_status}\n"
571
  )
572
- if ema_info and ema_info.get("EMA Drug Page"):
573
- regulatory_content += f"\n**EMA Drug Page:** {ema_info['EMA Drug Page']}\n"
574
-
575
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
576
  report_file = _save_pdf_report(regulatory_content, tmp_file.name)
577
  if report_file:
@@ -624,20 +595,18 @@ with tabs[5]:
624
 
625
  # Placeholder KPI counts (replace with real aggregated data if available)
626
  fda_count = 5000 # Example value
627
- ema_count = 3000 # Example value
628
- who_count = 1500 # Example value
629
  trials_count = 12000 # Example value
630
  pub_count = 250000 # Example value
631
 
632
- def _create_kpi_dashboard(fda: int, ema: int, who: int, trials: int, pubs: int):
633
- col1, col2, col3, col4, col5 = st.columns(5)
634
  col1.metric("FDA Approved Drugs", fda)
635
- col2.metric("EMA Approved Drugs", ema)
636
- col3.metric("WHO Essential Medicines", who)
637
- col4.metric("Ongoing Trials", trials)
638
- col5.metric("Publications", pubs)
639
 
640
- _create_kpi_dashboard(fda_count, ema_count, who_count, trials_count, pub_count)
641
 
642
  st.subheader("Trend Analysis")
643
  years = list(range(2000, 2026))
@@ -671,13 +640,6 @@ with tabs[6]:
671
 
672
  if st.button("Retrieve Drug Data"):
673
  with st.spinner("Fetching drug data from multiple sources..."):
674
- # DailyMed – using our existing DailyMed label function
675
- dailymed_label = _get_dailymed_label(drug_query)
676
- if dailymed_label:
677
- dm_label = dailymed_label.get("Label URL", "Not Available")
678
- else:
679
- dm_label = "Not Available"
680
-
681
  # RxNorm – Get RxCUI and then properties
682
  rxnorm_id = get_rxnorm_rxcui(drug_query)
683
  if rxnorm_id:
@@ -688,9 +650,6 @@ with tabs[6]:
688
  # RxClass – Get classes related to the drug
689
  rxclass_data = get_rxclass_by_drug_name(drug_query)
690
 
691
- st.subheader("DailyMed Label")
692
- st.write(f"DailyMed Label URL: {dm_label}")
693
-
694
  st.subheader("RxNorm Data")
695
  if rxnorm_id:
696
  st.write(f"RxCUI for {drug_query}: {rxnorm_id}")
@@ -702,9 +661,19 @@ with tabs[6]:
702
  st.write("No RxCUI found for the given drug name.")
703
 
704
  st.subheader("RxClass Information")
705
- if rxclass_data:
706
  st.json(rxclass_data)
707
  else:
708
  st.write("No RxClass data found for the given drug.")
 
 
 
 
 
 
 
 
 
 
709
 
710
 
 
24
  # API Endpoints (Centralized Configuration)
25
  # -----------------------------
26
  API_ENDPOINTS = {
27
+ "clinical_trials": "https://clinicaltrials.gov/api/v2/studies", # no email required
28
  "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
29
  "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
 
30
  "fda_drug_approval": "https://api.fda.gov/drug/label.json",
31
  "faers_adverse_events": "https://api.fda.gov/drug/event.json",
32
+ # PharmGKB endpoints (these require a PharmGKB accession such as "PA1234")
33
  "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
34
  "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
35
  "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
36
  "bioportal_search": "https://data.bioontology.org/search",
37
+ # RxNorm endpoints
 
 
38
  "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
39
  "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
40
+ # RxClass endpoint (example)
41
  "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
42
  }
43
 
 
122
  logging.error(f"Molecule drawing error: {e}")
123
  return None
124
 
125
+ def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
126
+ """Retrieves drug details such as molecular formula and IUPAC name from PubChem."""
127
+ url = API_ENDPOINTS["pubchem"].format(drug_name)
128
+ data = _query_api(url)
129
+ if data and data.get("PC_Compounds"):
130
+ compound = data["PC_Compounds"][0]
131
+ details = {}
132
+ for prop in compound.get("props", []):
133
+ urn = prop.get("urn", {})
134
+ if urn.get("label") == "Molecular Formula":
135
+ details["Molecular Formula"] = prop["value"]["sval"]
136
+ elif urn.get("name") in ["Preferred", "Systematic"]:
137
+ # Use Preferred IUPAC name if available
138
+ details["IUPAC Name"] = prop["value"]["sval"]
139
+ elif prop.get("name") == "Canonical SMILES":
140
+ details["Canonical SMILES"] = prop["value"]["sval"]
141
+ return details
142
+ return None
143
+
144
  def _get_clinical_trials(query: str) -> Optional[Dict]:
145
+ """Queries ClinicalTrials.gov using the 'query.term' parameter."""
146
  if query.upper().startswith("NCT") and query[3:].isdigit():
147
  params = {"id": query, "fmt": "json"}
148
  else:
 
213
  st.write(f"No data found for PharmGKB gene {pharmgkb_gene_id}.")
214
  return None
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
217
  """Fetches ontology data from BioPortal."""
218
  if not BIOPORTAL_API_KEY:
 
334
  url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
335
  return _query_api(url)
336
 
337
+ # -----------------------------
338
+ # New Function: PubChem Drug Details
339
+ # -----------------------------
340
+ def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
341
+ """Retrieves generic drug details (molecular formula, IUPAC name, and canonical SMILES) from PubChem."""
342
+ url = API_ENDPOINTS["pubchem"].format(drug_name)
343
+ data = _query_api(url)
344
+ details = {}
345
+ if data and data.get("PC_Compounds"):
346
+ compound = data["PC_Compounds"][0]
347
+ for prop in compound.get("props", []):
348
+ urn = prop.get("urn", {})
349
+ if urn.get("label") == "Molecular Formula":
350
+ details["Molecular Formula"] = prop["value"]["sval"]
351
+ if urn.get("name") == "Preferred":
352
+ details["IUPAC Name"] = prop["value"]["sval"]
353
+ if prop.get("name") == "Canonical SMILES":
354
+ details["Canonical SMILES"] = prop["value"]["sval"]
355
+ return details
356
+ return None
357
+
358
  # -----------------------------
359
  # Streamlit App Layout and Tabs
360
  # -----------------------------
361
+ # Define tabs including a new "Drug Data Integration" tab.
362
  tabs = st.tabs([
363
  "πŸ’Š Drug Development",
364
  "πŸ“Š Trial Analytics",
 
505
  # -----------------------------
506
  with tabs[3]:
507
  st.header("Global Regulatory Monitoring")
508
+ st.markdown("**Note:** EMA, WHO, and DailyMed endpoints have been removed due to persistent errors. Instead, we provide FDA data and generic drug details (formula, IUPAC name, structure) from PubChem.")
509
  drug_name = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
510
 
511
  if st.button("Generate Regulatory Report"):
 
516
  if fda_info and fda_info.get("openfda", {}).get("brand_name"):
517
  fda_status = ", ".join(fda_info["openfda"]["brand_name"])
518
 
519
+ # PubChem Drug Details for Generic/Formula Info
520
+ pubchem_details = _get_pubchem_drug_details(drug_name)
521
+ if pubchem_details:
522
+ formula = pubchem_details.get("Molecular Formula", "N/A")
523
+ iupac = pubchem_details.get("IUPAC Name", "N/A")
524
+ smiles = pubchem_details.get("Canonical SMILES", "N/A")
525
+ else:
526
+ formula = iupac = smiles = "Not Available"
 
 
 
527
 
528
  st.subheader("Regulatory Status")
529
+ col1, col2 = st.columns(2)
530
  with col1:
531
  st.markdown("**FDA Status**")
532
  st.write(fda_status)
533
  with col2:
534
+ st.markdown("**Generic/Formula Details (PubChem)**")
535
+ st.write(f"**Molecular Formula:** {formula}")
536
+ st.write(f"**IUPAC Name:** {iupac}")
537
+ st.write(f"**Canonical SMILES:** {smiles}")
 
 
 
 
 
 
 
 
538
 
539
  regulatory_content = (
540
  f"### Regulatory Report\n\n"
541
  f"**FDA Status:** {fda_status}\n\n"
542
+ f"**Molecular Formula:** {formula}\n\n"
543
+ f"**IUPAC Name:** {iupac}\n\n"
544
+ f"**Canonical SMILES:** {smiles}\n"
545
  )
 
 
 
546
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
547
  report_file = _save_pdf_report(regulatory_content, tmp_file.name)
548
  if report_file:
 
595
 
596
  # Placeholder KPI counts (replace with real aggregated data if available)
597
  fda_count = 5000 # Example value
598
+ ema_count = 3000 # Example value (not used now)
599
+ who_count = 1500 # Example value (not used now)
600
  trials_count = 12000 # Example value
601
  pub_count = 250000 # Example value
602
 
603
+ def _create_kpi_dashboard(fda: int, trials: int, pubs: int):
604
+ col1, col2, col3 = st.columns(3)
605
  col1.metric("FDA Approved Drugs", fda)
606
+ col2.metric("Ongoing Trials", trials)
607
+ col3.metric("Publications", pubs)
 
 
608
 
609
+ _create_kpi_dashboard(fda_count, trials_count, pub_count)
610
 
611
  st.subheader("Trend Analysis")
612
  years = list(range(2000, 2026))
 
640
 
641
  if st.button("Retrieve Drug Data"):
642
  with st.spinner("Fetching drug data from multiple sources..."):
 
 
 
 
 
 
 
643
  # RxNorm – Get RxCUI and then properties
644
  rxnorm_id = get_rxnorm_rxcui(drug_query)
645
  if rxnorm_id:
 
650
  # RxClass – Get classes related to the drug
651
  rxclass_data = get_rxclass_by_drug_name(drug_query)
652
 
 
 
 
653
  st.subheader("RxNorm Data")
654
  if rxnorm_id:
655
  st.write(f"RxCUI for {drug_query}: {rxnorm_id}")
 
661
  st.write("No RxCUI found for the given drug name.")
662
 
663
  st.subheader("RxClass Information")
664
+ if rxclass_data and rxclass_data.get("classMember"):
665
  st.json(rxclass_data)
666
  else:
667
  st.write("No RxClass data found for the given drug.")
668
+
669
+ # Additionally, show generic drug details from PubChem
670
+ pubchem_details = _get_pubchem_drug_details(drug_query)
671
+ st.subheader("PubChem Drug Details")
672
+ if pubchem_details:
673
+ st.write(f"**Molecular Formula:** {pubchem_details.get('Molecular Formula', 'N/A')}")
674
+ st.write(f"**IUPAC Name:** {pubchem_details.get('IUPAC Name', 'N/A')}")
675
+ st.write(f"**Canonical SMILES:** {pubchem_details.get('Canonical SMILES', 'N/A')}")
676
+ else:
677
+ st.write("No PubChem details found for the given drug.")
678
 
679