Update app.py
Browse files
app.py
CHANGED
@@ -24,18 +24,24 @@ logging.basicConfig(level=logging.ERROR)
|
|
24 |
# API Endpoints (Centralized Configuration)
|
25 |
# -----------------------------
|
26 |
API_ENDPOINTS = {
|
27 |
-
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
|
28 |
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
|
29 |
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
|
30 |
"who_drugs": "https://health-products.canada.ca/api/drug/product",
|
31 |
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
32 |
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
|
33 |
-
# PharmGKB endpoints require a PharmGKB accession (e.g., PA1234)
|
34 |
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
|
35 |
-
"pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
|
36 |
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
|
37 |
"bioportal_search": "https://data.bioontology.org/search",
|
|
|
38 |
"dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
|
|
|
|
|
|
|
|
|
|
|
39 |
}
|
40 |
|
41 |
# -----------------------------------
|
@@ -43,7 +49,7 @@ API_ENDPOINTS = {
|
|
43 |
# -----------------------------------
|
44 |
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY")
|
45 |
BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
|
46 |
-
PUB_EMAIL = st.secrets.get("PUB_EMAIL")
|
47 |
OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
|
48 |
|
49 |
if not PUB_EMAIL:
|
@@ -62,7 +68,7 @@ from openai import OpenAI
|
|
62 |
client = OpenAI(api_key=OPENAI_API_KEY)
|
63 |
|
64 |
def generate_content(prompt: str) -> str:
|
65 |
-
"""Generate content using GPT
|
66 |
try:
|
67 |
completion = client.chat.completions.create(
|
68 |
model="gpt-4",
|
@@ -99,7 +105,7 @@ def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
|
|
99 |
"""Retrieves a drug's SMILES string from PubChem."""
|
100 |
url = API_ENDPOINTS["pubchem"].format(drug_name)
|
101 |
data = _query_api(url)
|
102 |
-
if data and "PC_Compounds" in data and data["PC_Compounds"]:
|
103 |
for prop in data["PC_Compounds"][0].get("props", []):
|
104 |
if prop.get("name") == "Canonical SMILES":
|
105 |
return prop["value"]["sval"]
|
@@ -120,41 +126,37 @@ def _draw_molecule(smiles: str) -> Optional[Any]:
|
|
120 |
return None
|
121 |
|
122 |
def _get_clinical_trials(query: str) -> Optional[Dict]:
|
123 |
-
"""Queries
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
data = _query_api(API_ENDPOINTS["clinical_trials"], params)
|
128 |
-
if not data:
|
129 |
-
# Fallback to alternative parameter name:
|
130 |
params = {"query.term": query, "retmax": 10, "retmode": "json"}
|
131 |
-
|
132 |
-
return data
|
133 |
|
134 |
def _get_pubmed(query: str) -> Optional[Dict]:
|
135 |
"""Queries PubMed using E-utilities."""
|
136 |
params = {"db": "pubmed", "term": query, "retmax": 10, "retmode": "json", "email": PUB_EMAIL}
|
137 |
return _query_api(API_ENDPOINTS["pubmed"], params)
|
138 |
|
139 |
-
def _get_fda_approval(drug_name: str
|
140 |
"""Retrieves FDA drug label approval info."""
|
141 |
-
if not
|
142 |
st.error("OpenFDA API key not configured.")
|
143 |
return None
|
144 |
query = f'openfda.brand_name:"{drug_name}"'
|
145 |
-
params = {"api_key":
|
146 |
data = _query_api(API_ENDPOINTS["fda_drug_approval"], params)
|
147 |
if data and data.get("results"):
|
148 |
return data["results"][0]
|
149 |
return None
|
150 |
|
151 |
-
def _analyze_adverse_events(drug_name: str,
|
152 |
"""Fetches adverse event reports from FAERS."""
|
153 |
-
if not
|
154 |
st.error("OpenFDA API key not configured.")
|
155 |
return None
|
156 |
query = f'patient.drug.medicinalproduct:"{drug_name}"'
|
157 |
-
params = {"api_key":
|
158 |
return _query_api(API_ENDPOINTS["faers_adverse_events"], params)
|
159 |
|
160 |
def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
|
@@ -169,10 +171,10 @@ def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
|
|
169 |
def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]]:
|
170 |
"""
|
171 |
Retrieves variant IDs for a gene using its PharmGKB accession.
|
172 |
-
If
|
173 |
"""
|
174 |
if not pharmgkb_gene_id.startswith("PA"):
|
175 |
-
st.warning("Please provide a valid PharmGKB accession ID
|
176 |
return None
|
177 |
endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(pharmgkb_gene_id)
|
178 |
data = _query_api(endpoint)
|
@@ -182,7 +184,9 @@ def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]
|
|
182 |
return None
|
183 |
|
184 |
def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
|
185 |
-
"""
|
|
|
|
|
186 |
if not pharmgkb_gene_id.startswith("PA"):
|
187 |
st.warning("Please enter a valid PharmGKB gene accession ID (e.g., PA1234).")
|
188 |
return None
|
@@ -194,7 +198,7 @@ def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
|
|
194 |
return None
|
195 |
|
196 |
def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
|
197 |
-
"""Scrapes
|
198 |
try:
|
199 |
search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
|
200 |
headers = {
|
@@ -229,7 +233,7 @@ def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
|
|
229 |
return None
|
230 |
|
231 |
def _get_dailymed_label(drug_name: str) -> Optional[Dict]:
|
232 |
-
"""Retrieves DailyMed label info
|
233 |
try:
|
234 |
params = {"drug_name": drug_name, "page": 1, "pagesize": 1}
|
235 |
data = _query_api(API_ENDPOINTS["dailymed"], params)
|
@@ -346,18 +350,39 @@ def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -
|
|
346 |
return fig
|
347 |
|
348 |
# -----------------------------
|
349 |
-
#
|
350 |
# -----------------------------
|
351 |
-
|
352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
|
|
|
|
|
|
|
|
|
354 |
tabs = st.tabs([
|
355 |
"π Drug Development",
|
356 |
"π Trial Analytics",
|
357 |
"𧬠Molecular Profiling",
|
358 |
"π Regulatory Intelligence",
|
359 |
"π Literature Search",
|
360 |
-
"π Dashboard"
|
|
|
361 |
])
|
362 |
|
363 |
# -----------------------------
|
@@ -469,7 +494,7 @@ with tabs[2]:
|
|
469 |
if st.button("Analyze Compound"):
|
470 |
with st.spinner("Querying PubChem..."):
|
471 |
smiles = None
|
472 |
-
# If
|
473 |
if Chem.MolFromSmiles(compound_input):
|
474 |
smiles = compound_input
|
475 |
else:
|
@@ -510,7 +535,7 @@ with tabs[3]:
|
|
510 |
ema_info = scrape_ema_drug_info(drug_name)
|
511 |
ema_status = ema_info.get("EMA Approval Status") if ema_info else "Not Available"
|
512 |
|
513 |
-
# WHO Data
|
514 |
who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
|
515 |
who_status = "Yes" if who else "No"
|
516 |
|
@@ -637,3 +662,60 @@ with tabs[5]:
|
|
637 |
except Exception as e:
|
638 |
st.error(f"Error generating network graph: {e}")
|
639 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
# API Endpoints (Centralized Configuration)
|
25 |
# -----------------------------
|
26 |
API_ENDPOINTS = {
|
27 |
+
"clinical_trials": "https://clinicaltrials.gov/api/v2/studies", # no email needed
|
28 |
"pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
|
29 |
"pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
|
30 |
"who_drugs": "https://health-products.canada.ca/api/drug/product",
|
31 |
"fda_drug_approval": "https://api.fda.gov/drug/label.json",
|
32 |
"faers_adverse_events": "https://api.fda.gov/drug/event.json",
|
33 |
+
# PharmGKB endpoints: these require a PharmGKB accession (e.g., PA1234)
|
34 |
"pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
|
35 |
+
"pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
|
36 |
"pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
|
37 |
"bioportal_search": "https://data.bioontology.org/search",
|
38 |
+
# DailyMed: note the base URL now uses HTTPS and version v2
|
39 |
"dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
|
40 |
+
# RxNorm endpoints (examples)
|
41 |
+
"rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
|
42 |
+
"rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
|
43 |
+
# RxClass endpoint example
|
44 |
+
"rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
|
45 |
}
|
46 |
|
47 |
# -----------------------------------
|
|
|
49 |
# -----------------------------------
|
50 |
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY")
|
51 |
BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
|
52 |
+
PUB_EMAIL = st.secrets.get("PUB_EMAIL")
|
53 |
OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
|
54 |
|
55 |
if not PUB_EMAIL:
|
|
|
68 |
client = OpenAI(api_key=OPENAI_API_KEY)
|
69 |
|
70 |
def generate_content(prompt: str) -> str:
|
71 |
+
"""Generate content using GPTβ4 via the new OpenAI SDK."""
|
72 |
try:
|
73 |
completion = client.chat.completions.create(
|
74 |
model="gpt-4",
|
|
|
105 |
"""Retrieves a drug's SMILES string from PubChem."""
|
106 |
url = API_ENDPOINTS["pubchem"].format(drug_name)
|
107 |
data = _query_api(url)
|
108 |
+
if data and "PC_Compounds" in data and len(data["PC_Compounds"]) > 0:
|
109 |
for prop in data["PC_Compounds"][0].get("props", []):
|
110 |
if prop.get("name") == "Canonical SMILES":
|
111 |
return prop["value"]["sval"]
|
|
|
126 |
return None
|
127 |
|
128 |
def _get_clinical_trials(query: str) -> Optional[Dict]:
|
129 |
+
"""Queries ClinicalTrials.gov using the 'query.term' parameter (no email required)."""
|
130 |
+
if query.upper().startswith("NCT") and query[3:].isdigit():
|
131 |
+
params = {"id": query, "fmt": "json"}
|
132 |
+
else:
|
|
|
|
|
|
|
133 |
params = {"query.term": query, "retmax": 10, "retmode": "json"}
|
134 |
+
return _query_api(API_ENDPOINTS["clinical_trials"], params)
|
|
|
135 |
|
136 |
def _get_pubmed(query: str) -> Optional[Dict]:
|
137 |
"""Queries PubMed using E-utilities."""
|
138 |
params = {"db": "pubmed", "term": query, "retmax": 10, "retmode": "json", "email": PUB_EMAIL}
|
139 |
return _query_api(API_ENDPOINTS["pubmed"], params)
|
140 |
|
141 |
+
def _get_fda_approval(drug_name: str) -> Optional[Dict]:
|
142 |
"""Retrieves FDA drug label approval info."""
|
143 |
+
if not OPENFDA_KEY:
|
144 |
st.error("OpenFDA API key not configured.")
|
145 |
return None
|
146 |
query = f'openfda.brand_name:"{drug_name}"'
|
147 |
+
params = {"api_key": OPENFDA_KEY, "search": query, "limit": 1}
|
148 |
data = _query_api(API_ENDPOINTS["fda_drug_approval"], params)
|
149 |
if data and data.get("results"):
|
150 |
return data["results"][0]
|
151 |
return None
|
152 |
|
153 |
+
def _analyze_adverse_events(drug_name: str, limit: int = 5) -> Optional[Dict]:
|
154 |
"""Fetches adverse event reports from FAERS."""
|
155 |
+
if not OPENFDA_KEY:
|
156 |
st.error("OpenFDA API key not configured.")
|
157 |
return None
|
158 |
query = f'patient.drug.medicinalproduct:"{drug_name}"'
|
159 |
+
params = {"api_key": OPENFDA_KEY, "search": query, "limit": limit}
|
160 |
return _query_api(API_ENDPOINTS["faers_adverse_events"], params)
|
161 |
|
162 |
def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
|
|
|
171 |
def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]]:
|
172 |
"""
|
173 |
Retrieves variant IDs for a gene using its PharmGKB accession.
|
174 |
+
If the accession does not start with "PA", warn the user.
|
175 |
"""
|
176 |
if not pharmgkb_gene_id.startswith("PA"):
|
177 |
+
st.warning("Please provide a valid PharmGKB accession ID (e.g., PA1234).")
|
178 |
return None
|
179 |
endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(pharmgkb_gene_id)
|
180 |
data = _query_api(endpoint)
|
|
|
184 |
return None
|
185 |
|
186 |
def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
|
187 |
+
"""
|
188 |
+
Retrieves PharmGKB gene data using a PharmGKB accession.
|
189 |
+
"""
|
190 |
if not pharmgkb_gene_id.startswith("PA"):
|
191 |
st.warning("Please enter a valid PharmGKB gene accession ID (e.g., PA1234).")
|
192 |
return None
|
|
|
198 |
return None
|
199 |
|
200 |
def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
|
201 |
+
"""Scrapes EMA website for drug information using browser-like headers."""
|
202 |
try:
|
203 |
search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
|
204 |
headers = {
|
|
|
233 |
return None
|
234 |
|
235 |
def _get_dailymed_label(drug_name: str) -> Optional[Dict]:
|
236 |
+
"""Retrieves DailyMed label info using the v2 API (returns JSON)."""
|
237 |
try:
|
238 |
params = {"drug_name": drug_name, "page": 1, "pagesize": 1}
|
239 |
data = _query_api(API_ENDPOINTS["dailymed"], params)
|
|
|
350 |
return fig
|
351 |
|
352 |
# -----------------------------
|
353 |
+
# New Functions: RxNorm and RxClass Integration
|
354 |
# -----------------------------
|
355 |
+
def get_rxnorm_rxcui(drug_name: str) -> Optional[str]:
|
356 |
+
"""Retrieves the RxNorm Concept Unique Identifier (RxCUI) for a drug name."""
|
357 |
+
url = f"{API_ENDPOINTS['rxnorm_rxcui']}?name={drug_name}"
|
358 |
+
data = _query_api(url)
|
359 |
+
if data and "idGroup" in data and data["idGroup"].get("rxnormId"):
|
360 |
+
return data["idGroup"]["rxnormId"][0]
|
361 |
+
st.warning(f"No RxCUI found for {drug_name}.")
|
362 |
+
return None
|
363 |
+
|
364 |
+
def get_rxnorm_properties(rxcui: str) -> Optional[Dict]:
|
365 |
+
"""Retrieves RxNorm properties for a given RxCUI."""
|
366 |
+
url = API_ENDPOINTS["rxnorm_properties"].format(rxcui)
|
367 |
+
return _query_api(url)
|
368 |
+
|
369 |
+
def get_rxclass_by_drug_name(drug_name: str) -> Optional[Dict]:
|
370 |
+
"""Retrieves RxClass information for a drug by name."""
|
371 |
+
url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
|
372 |
+
return _query_api(url)
|
373 |
|
374 |
+
# -----------------------------
|
375 |
+
# Streamlit App Layout and Tabs
|
376 |
+
# -----------------------------
|
377 |
+
# Add a new tab "π§ͺ Drug Data Integration" to bring together various drug APIs.
|
378 |
tabs = st.tabs([
|
379 |
"π Drug Development",
|
380 |
"π Trial Analytics",
|
381 |
"𧬠Molecular Profiling",
|
382 |
"π Regulatory Intelligence",
|
383 |
"π Literature Search",
|
384 |
+
"π Dashboard",
|
385 |
+
"π§ͺ Drug Data Integration"
|
386 |
])
|
387 |
|
388 |
# -----------------------------
|
|
|
494 |
if st.button("Analyze Compound"):
|
495 |
with st.spinner("Querying PubChem..."):
|
496 |
smiles = None
|
497 |
+
# If input is a valid SMILES, use it; otherwise attempt to retrieve from PubChem.
|
498 |
if Chem.MolFromSmiles(compound_input):
|
499 |
smiles = compound_input
|
500 |
else:
|
|
|
535 |
ema_info = scrape_ema_drug_info(drug_name)
|
536 |
ema_status = ema_info.get("EMA Approval Status") if ema_info else "Not Available"
|
537 |
|
538 |
+
# WHO Data (Health Canada API)
|
539 |
who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
|
540 |
who_status = "Yes" if who else "No"
|
541 |
|
|
|
662 |
except Exception as e:
|
663 |
st.error(f"Error generating network graph: {e}")
|
664 |
|
665 |
+
# -----------------------------
|
666 |
+
# Tab 7: Drug Data Integration
|
667 |
+
# -----------------------------
|
668 |
+
with tabs[6]:
|
669 |
+
st.header("π§ͺ Drug Data Integration")
|
670 |
+
drug_query = st.text_input("Enter Drug Name for API Integration:", placeholder="e.g., aspirin")
|
671 |
+
|
672 |
+
if st.button("Retrieve Drug Data"):
|
673 |
+
with st.spinner("Fetching drug data from multiple sources..."):
|
674 |
+
# DailyMed β using our existing DailyMed label function
|
675 |
+
dailymed_label = _get_dailymed_label(drug_query)
|
676 |
+
if dailymed_label:
|
677 |
+
dm_label = dailymed_label.get("Label URL", "Not Available")
|
678 |
+
else:
|
679 |
+
dm_label = "Not Available"
|
680 |
+
|
681 |
+
# RxNorm β Get RxCUI and then properties
|
682 |
+
rxnorm_id = get_rxnorm_rxcui(drug_query)
|
683 |
+
if rxnorm_id:
|
684 |
+
rx_properties = get_rxnorm_properties(rxnorm_id)
|
685 |
+
else:
|
686 |
+
rx_properties = None
|
687 |
+
|
688 |
+
# RxClass β Get classes related to the drug
|
689 |
+
rxclass_data = get_rxclass_by_drug_name(drug_query)
|
690 |
+
|
691 |
+
st.subheader("DailyMed Label")
|
692 |
+
st.write(f"DailyMed Label URL: {dm_label}")
|
693 |
+
|
694 |
+
st.subheader("RxNorm Data")
|
695 |
+
if rxnorm_id:
|
696 |
+
st.write(f"RxCUI for {drug_query}: {rxnorm_id}")
|
697 |
+
if rx_properties:
|
698 |
+
st.json(rx_properties)
|
699 |
+
else:
|
700 |
+
st.write("No RxNorm properties found.")
|
701 |
+
else:
|
702 |
+
st.write("No RxCUI found for the given drug name.")
|
703 |
+
|
704 |
+
st.subheader("RxClass Information")
|
705 |
+
if rxclass_data:
|
706 |
+
st.json(rxclass_data)
|
707 |
+
else:
|
708 |
+
st.write("No RxClass data found for the given drug.")
|
709 |
+
|
710 |
+
# -----------------------------
|
711 |
+
# Sidebar Information
|
712 |
+
# -----------------------------
|
713 |
+
st.sidebar.header("About")
|
714 |
+
st.sidebar.info("""
|
715 |
+
**Pharma Research Expert Platform**
|
716 |
+
|
717 |
+
An integrated tool for drug discovery, clinical research, and regulatory affairs.
|
718 |
+
|
719 |
+
**Developed by:** Your Name
|
720 |
+
**Contact:** [[email protected]](mailto:[email protected])
|
721 |
+
""")
|