Spaces:

mgbam
/

drugapp

Running

App Files Files Community

mgbam commited on Jan 31

Commit

5d65f21

verified ·

1 Parent(s): 8bcce34

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -112

app.py CHANGED Viewed

@@ -24,23 +24,20 @@ logging.basicConfig(level=logging.ERROR)
 # API Endpoints (Centralized Configuration)
 # -----------------------------
 API_ENDPOINTS = {
-    "clinical_trials": "https://clinicaltrials.gov/api/v2/studies",  # no email needed
     "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
     "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
-    "who_drugs": "https://health-products.canada.ca/api/drug/product",
     "fda_drug_approval": "https://api.fda.gov/drug/label.json",
     "faers_adverse_events": "https://api.fda.gov/drug/event.json",
-    # PharmGKB endpoints: these require a PharmGKB accession (e.g., PA1234)
     "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
     "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
     "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
     "bioportal_search": "https://data.bioontology.org/search",
-    # DailyMed: note the base URL now uses HTTPS and version v2
-    "dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
-    # RxNorm endpoints (examples)
     "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
     "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
-    # RxClass endpoint example
     "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
 }
@@ -125,8 +122,27 @@ def _draw_molecule(smiles: str) -> Optional[Any]:
         logging.error(f"Molecule drawing error: {e}")
         return None
 def _get_clinical_trials(query: str) -> Optional[Dict]:
-    """Queries ClinicalTrials.gov using the 'query.term' parameter (no email required)."""
     if query.upper().startswith("NCT") and query[3:].isdigit():
         params = {"id": query, "fmt": "json"}
     else:
@@ -197,59 +213,6 @@ def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
     st.write(f"No data found for PharmGKB gene {pharmgkb_gene_id}.")
     return None
-def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
-    """Scrapes EMA website for drug information using browser-like headers."""
-    try:
-        search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
-        headers = {
-            "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-                           "AppleWebKit/537.36 (KHTML, like Gecko) "
-                           "Chrome/90.0.4430.93 Safari/537.36")
-        }
-        response = requests.get(search_url, headers=headers, timeout=10)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, "html.parser")
-        result = soup.find("a", class_="search-result__title")
-        if not result:
-            st.warning(f"No EMA data found for '{drug_name}'.")
-            return None
-        drug_page_url = "https://www.ema.europa.eu" + result.get("href")
-        drug_response = requests.get(drug_page_url, headers=headers, timeout=10)
-        drug_response.raise_for_status()
-        drug_soup = BeautifulSoup(drug_response.text, "html.parser")
-        approval_elem = drug_soup.find("span", class_="product-status")
-        approval_status = approval_elem.get_text(strip=True) if approval_elem else "Not Available"
-        indications_elem = drug_soup.find("div", class_="indications")
-        indications = indications_elem.get_text(strip=True) if indications_elem else "Not Available"
-        return {
-            "Drug Name": drug_name,
-            "EMA Approval Status": approval_status,
-            "Indications": indications,
-            "EMA Drug Page": drug_page_url
-        }
-    except Exception as e:
-        st.error(f"Error scraping EMA data: {e}")
-        logging.error(f"EMA scraping error: {e}")
-        return None
-def _get_dailymed_label(drug_name: str) -> Optional[Dict]:
-    """Retrieves DailyMed label info using the v2 API (returns JSON)."""
-    try:
-        params = {"drug_name": drug_name, "page": 1, "pagesize": 1}
-        data = _query_api(API_ENDPOINTS["dailymed"], params)
-        if data and data.get("data") and len(data["data"]) > 0:
-            label_url = data["data"][0].get("url")
-            if not label_url:
-                st.warning("DailyMed label URL not found.")
-                return None
-            return {"Label URL": label_url}
-        st.warning(f"No DailyMed label found for '{drug_name}'.")
-        return None
-    except Exception as e:
-        st.error(f"Error fetching DailyMed data: {e}")
-        logging.error(f"DailyMed error: {e}")
-        return None
 def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
     """Fetches ontology data from BioPortal."""
     if not BIOPORTAL_API_KEY:
@@ -371,10 +334,31 @@ def get_rxclass_by_drug_name(drug_name: str) -> Optional[Dict]:
     url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
     return _query_api(url)
 # -----------------------------
 # Streamlit App Layout and Tabs
 # -----------------------------
-# Add a new tab "🧪 Drug Data Integration" to bring together various drug APIs.
 tabs = st.tabs([
     "💊 Drug Development",
     "📊 Trial Analytics",
@@ -521,6 +505,7 @@ with tabs[2]:
 # -----------------------------
 with tabs[3]:
     st.header("Global Regulatory Monitoring")
     drug_name = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
     if st.button("Generate Regulatory Report"):
@@ -531,47 +516,33 @@ with tabs[3]:
             if fda_info and fda_info.get("openfda", {}).get("brand_name"):
                 fda_status = ", ".join(fda_info["openfda"]["brand_name"])
-            # EMA Data via Scraping
-            ema_info = scrape_ema_drug_info(drug_name)
-            ema_status = ema_info.get("EMA Approval Status") if ema_info else "Not Available"
-            # WHO Data (Health Canada API)
-            who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
-            who_status = "Yes" if who else "No"
-            # DailyMed Label
-            dailymed_info = _get_dailymed_label(drug_name)
-            dailymed_status = dailymed_info.get("Label URL") if dailymed_info else "Not Available"
             st.subheader("Regulatory Status")
-            col1, col2, col3, col4 = st.columns(4)
             with col1:
                 st.markdown("**FDA Status**")
                 st.write(fda_status)
             with col2:
-                st.markdown("**EMA Status**")
-                st.write(ema_status)
-            with col3:
-                st.markdown("**WHO Essential Medicine**")
-                st.write(who_status)
-            with col4:
-                st.markdown("**DailyMed Label**")
-                st.write(dailymed_status)
-                if dailymed_info and dailymed_info.get("Label URL"):
-                    st.markdown(f"[View DailyMed Label]({dailymed_info['Label URL']})")
-            if ema_info and ema_info.get("EMA Drug Page"):
-                st.markdown(f"[View EMA Drug Page]({ema_info['EMA Drug Page']})")
             regulatory_content = (
                 f"### Regulatory Report\n\n"
                 f"**FDA Status:** {fda_status}\n\n"
-                f"**EMA Status:** {ema_status}\n\n"
-                f"**WHO Essential Medicine:** {who_status}\n\n"
-                f"**DailyMed Label:** {dailymed_status}\n"
             )
-            if ema_info and ema_info.get("EMA Drug Page"):
-                regulatory_content += f"\n**EMA Drug Page:** {ema_info['EMA Drug Page']}\n"
             with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
                 report_file = _save_pdf_report(regulatory_content, tmp_file.name)
                 if report_file:
@@ -624,20 +595,18 @@ with tabs[5]:
     # Placeholder KPI counts (replace with real aggregated data if available)
     fda_count = 5000       # Example value
-    ema_count = 3000       # Example value
-    who_count = 1500       # Example value
     trials_count = 12000   # Example value
     pub_count = 250000     # Example value
-    def _create_kpi_dashboard(fda: int, ema: int, who: int, trials: int, pubs: int):
-        col1, col2, col3, col4, col5 = st.columns(5)
         col1.metric("FDA Approved Drugs", fda)
-        col2.metric("EMA Approved Drugs", ema)
-        col3.metric("WHO Essential Medicines", who)
-        col4.metric("Ongoing Trials", trials)
-        col5.metric("Publications", pubs)
-    _create_kpi_dashboard(fda_count, ema_count, who_count, trials_count, pub_count)
     st.subheader("Trend Analysis")
     years = list(range(2000, 2026))
@@ -671,13 +640,6 @@ with tabs[6]:
     if st.button("Retrieve Drug Data"):
         with st.spinner("Fetching drug data from multiple sources..."):
-            # DailyMed – using our existing DailyMed label function
-            dailymed_label = _get_dailymed_label(drug_query)
-            if dailymed_label:
-                dm_label = dailymed_label.get("Label URL", "Not Available")
-            else:
-                dm_label = "Not Available"
             # RxNorm – Get RxCUI and then properties
             rxnorm_id = get_rxnorm_rxcui(drug_query)
             if rxnorm_id:
@@ -688,9 +650,6 @@ with tabs[6]:
             # RxClass – Get classes related to the drug
             rxclass_data = get_rxclass_by_drug_name(drug_query)
-            st.subheader("DailyMed Label")
-            st.write(f"DailyMed Label URL: {dm_label}")
             st.subheader("RxNorm Data")
             if rxnorm_id:
                 st.write(f"RxCUI for {drug_query}: {rxnorm_id}")
@@ -702,9 +661,19 @@ with tabs[6]:
                 st.write("No RxCUI found for the given drug name.")
             st.subheader("RxClass Information")
-            if rxclass_data:
                 st.json(rxclass_data)
             else:
                 st.write("No RxClass data found for the given drug.")

 # API Endpoints (Centralized Configuration)
 # -----------------------------
 API_ENDPOINTS = {
+    "clinical_trials": "https://clinicaltrials.gov/api/v2/studies",  # no email required
     "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
     "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
     "fda_drug_approval": "https://api.fda.gov/drug/label.json",
     "faers_adverse_events": "https://api.fda.gov/drug/event.json",
+    # PharmGKB endpoints (these require a PharmGKB accession such as "PA1234")
     "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
     "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
     "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
     "bioportal_search": "https://data.bioontology.org/search",
+    # RxNorm endpoints
     "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
     "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
+    # RxClass endpoint (example)
     "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
 }
         logging.error(f"Molecule drawing error: {e}")
         return None
+def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
+    """Retrieves drug details such as molecular formula and IUPAC name from PubChem."""
+    url = API_ENDPOINTS["pubchem"].format(drug_name)
+    data = _query_api(url)
+    if data and data.get("PC_Compounds"):
+        compound = data["PC_Compounds"][0]
+        details = {}
+        for prop in compound.get("props", []):
+            urn = prop.get("urn", {})
+            if urn.get("label") == "Molecular Formula":
+                details["Molecular Formula"] = prop["value"]["sval"]
+            elif urn.get("name") in ["Preferred", "Systematic"]:
+                # Use Preferred IUPAC name if available
+                details["IUPAC Name"] = prop["value"]["sval"]
+            elif prop.get("name") == "Canonical SMILES":
+                details["Canonical SMILES"] = prop["value"]["sval"]
+        return details
+    return None
 def _get_clinical_trials(query: str) -> Optional[Dict]:
+    """Queries ClinicalTrials.gov using the 'query.term' parameter."""
     if query.upper().startswith("NCT") and query[3:].isdigit():
         params = {"id": query, "fmt": "json"}
     else:
     st.write(f"No data found for PharmGKB gene {pharmgkb_gene_id}.")
     return None
 def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
     """Fetches ontology data from BioPortal."""
     if not BIOPORTAL_API_KEY:
     url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
     return _query_api(url)
+# -----------------------------
+# New Function: PubChem Drug Details
+# -----------------------------
+def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
+    """Retrieves generic drug details (molecular formula, IUPAC name, and canonical SMILES) from PubChem."""
+    url = API_ENDPOINTS["pubchem"].format(drug_name)
+    data = _query_api(url)
+    details = {}
+    if data and data.get("PC_Compounds"):
+        compound = data["PC_Compounds"][0]
+        for prop in compound.get("props", []):
+            urn = prop.get("urn", {})
+            if urn.get("label") == "Molecular Formula":
+                details["Molecular Formula"] = prop["value"]["sval"]
+            if urn.get("name") == "Preferred":
+                details["IUPAC Name"] = prop["value"]["sval"]
+            if prop.get("name") == "Canonical SMILES":
+                details["Canonical SMILES"] = prop["value"]["sval"]
+        return details
+    return None
 # -----------------------------
 # Streamlit App Layout and Tabs
 # -----------------------------
+# Define tabs including a new "Drug Data Integration" tab.
 tabs = st.tabs([
     "💊 Drug Development",
     "📊 Trial Analytics",
 # -----------------------------
 with tabs[3]:
     st.header("Global Regulatory Monitoring")
+    st.markdown("**Note:** EMA, WHO, and DailyMed endpoints have been removed due to persistent errors. Instead, we provide FDA data and generic drug details (formula, IUPAC name, structure) from PubChem.")
     drug_name = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
     if st.button("Generate Regulatory Report"):
             if fda_info and fda_info.get("openfda", {}).get("brand_name"):
                 fda_status = ", ".join(fda_info["openfda"]["brand_name"])
+            # PubChem Drug Details for Generic/Formula Info
+            pubchem_details = _get_pubchem_drug_details(drug_name)
+            if pubchem_details:
+                formula = pubchem_details.get("Molecular Formula", "N/A")
+                iupac = pubchem_details.get("IUPAC Name", "N/A")
+                smiles = pubchem_details.get("Canonical SMILES", "N/A")
+            else:
+                formula = iupac = smiles = "Not Available"
             st.subheader("Regulatory Status")
+            col1, col2 = st.columns(2)
             with col1:
                 st.markdown("**FDA Status**")
                 st.write(fda_status)
             with col2:
+                st.markdown("**Generic/Formula Details (PubChem)**")
+                st.write(f"**Molecular Formula:** {formula}")
+                st.write(f"**IUPAC Name:** {iupac}")
+                st.write(f"**Canonical SMILES:** {smiles}")
             regulatory_content = (
                 f"### Regulatory Report\n\n"
                 f"**FDA Status:** {fda_status}\n\n"
+                f"**Molecular Formula:** {formula}\n\n"
+                f"**IUPAC Name:** {iupac}\n\n"
+                f"**Canonical SMILES:** {smiles}\n"
             )
             with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
                 report_file = _save_pdf_report(regulatory_content, tmp_file.name)
                 if report_file:
     # Placeholder KPI counts (replace with real aggregated data if available)
     fda_count = 5000       # Example value
+    ema_count = 3000       # Example value (not used now)
+    who_count = 1500       # Example value (not used now)
     trials_count = 12000   # Example value
     pub_count = 250000     # Example value
+    def _create_kpi_dashboard(fda: int, trials: int, pubs: int):
+        col1, col2, col3 = st.columns(3)
         col1.metric("FDA Approved Drugs", fda)
+        col2.metric("Ongoing Trials", trials)
+        col3.metric("Publications", pubs)
+    _create_kpi_dashboard(fda_count, trials_count, pub_count)
     st.subheader("Trend Analysis")
     years = list(range(2000, 2026))
     if st.button("Retrieve Drug Data"):
         with st.spinner("Fetching drug data from multiple sources..."):
             # RxNorm – Get RxCUI and then properties
             rxnorm_id = get_rxnorm_rxcui(drug_query)
             if rxnorm_id:
             # RxClass – Get classes related to the drug
             rxclass_data = get_rxclass_by_drug_name(drug_query)
             st.subheader("RxNorm Data")
             if rxnorm_id:
                 st.write(f"RxCUI for {drug_query}: {rxnorm_id}")
                 st.write("No RxCUI found for the given drug name.")
             st.subheader("RxClass Information")
+            if rxclass_data and rxclass_data.get("classMember"):
                 st.json(rxclass_data)
             else:
                 st.write("No RxClass data found for the given drug.")
+            # Additionally, show generic drug details from PubChem
+            pubchem_details = _get_pubchem_drug_details(drug_query)
+            st.subheader("PubChem Drug Details")
+            if pubchem_details:
+                st.write(f"**Molecular Formula:** {pubchem_details.get('Molecular Formula', 'N/A')}")
+                st.write(f"**IUPAC Name:** {pubchem_details.get('IUPAC Name', 'N/A')}")
+                st.write(f"**Canonical SMILES:** {pubchem_details.get('Canonical SMILES', 'N/A')}")
+            else:
+                st.write("No PubChem details found for the given drug.")