mgbam commited on
Commit
c1e3091
·
verified ·
1 Parent(s): 60db7a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +225 -577
app.py CHANGED
@@ -1,3 +1,11 @@
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import requests
3
  from rdkit import Chem
@@ -11,619 +19,259 @@ import logging
11
  import os
12
  import plotly.graph_objects as go
13
  import networkx as nx
14
- from typing import Optional, Dict, List, Any
 
15
 
16
- # -----------------------------
17
- # SETUP
18
- # -----------------------------
19
- # Must be the very first Streamlit command
20
- st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
21
- logging.basicConfig(level=logging.ERROR)
 
22
 
23
  # -----------------------------
24
- # API ENDPOINTS
25
  # -----------------------------
26
  API_ENDPOINTS = {
27
- "clinical_trials": "https://clinicaltrials.gov/api/v2/studies", # no email required
28
- "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
29
- "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
30
  "fda_drug_approval": "https://api.fda.gov/drug/label.json",
31
  "faers_adverse_events": "https://api.fda.gov/drug/event.json",
32
- # PharmGKB endpoints – expecting a PharmGKB accession (e.g., PA1234)
 
 
 
 
 
33
  "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
34
  "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
35
  "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
 
 
36
  "bioportal_search": "https://data.bioontology.org/search",
37
- # RxNorm endpoints
 
38
  "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
39
  "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
40
- # RxClass endpoint – note: this endpoint sometimes returns 404 if no data are available.
41
  "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
42
  }
43
 
44
- # -----------------------------
45
- # Retrieve Secrets
46
- # -----------------------------
47
- OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY")
48
- BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
49
- PUB_EMAIL = st.secrets.get("PUB_EMAIL")
50
- OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
51
-
52
- if not PUB_EMAIL:
53
- st.error("PubMed email (PUB_EMAIL) is not configured.")
54
- if not BIOPORTAL_API_KEY:
55
- st.error("BioPortal API key (BIOPORTAL_API_KEY) is not configured.")
56
- if not OPENFDA_KEY:
57
- st.error("OpenFDA API key (OPENFDA_KEY) is not configured.")
58
- if not OPENAI_API_KEY:
59
- st.error("OpenAI API key (OPENAI_API_KEY) is not configured.")
60
 
61
  # -----------------------------
62
- # Initialize OpenAI Client (GPT-4)
63
  # -----------------------------
64
- from openai import OpenAI
65
- openai_client = OpenAI(api_key=OPENAI_API_KEY)
66
-
67
- def generate_content(prompt: str) -> str:
68
- """Generate content using GPT-4 via the OpenAI API."""
69
- try:
70
- completion = openai_client.chat.completions.create(
71
- model="gpt-4",
72
- messages=[{"role": "user", "content": prompt}],
73
- max_tokens=300
74
- )
75
- return completion.choices[0].message.content.strip()
76
- except Exception as e:
77
- st.error(f"Error generating content: {e}")
78
- logging.error(f"OpenAI generation error: {e}")
79
- return "Content generation failed."
 
80
 
81
  # -----------------------------
82
- # Utility Functions
83
  # -----------------------------
84
- def _query_api(endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict]:
85
- """Handles API requests with error handling."""
86
- try:
87
- response = requests.get(endpoint, params=params, headers=headers, timeout=15)
88
- response.raise_for_status()
89
- return response.json()
90
- except Exception as e:
91
- st.error(f"API error for {endpoint}: {e}")
92
- logging.error(f"Error for {endpoint}: {e}")
93
- return None
94
-
95
- def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
96
- """Retrieve canonical SMILES string from PubChem."""
97
- url = API_ENDPOINTS["pubchem"].format(drug_name)
98
- data = _query_api(url)
99
- if data and data.get("PC_Compounds"):
100
- for prop in data["PC_Compounds"][0].get("props", []):
101
- if prop.get("name") == "Canonical SMILES":
102
- return prop["value"]["sval"]
103
- return None
104
-
105
- def _draw_molecule(smiles: str) -> Optional[Any]:
106
- """Draw a molecule image using RDKit."""
107
- try:
108
- mol = Chem.MolFromSmiles(smiles)
109
- if mol:
110
- return Draw.MolToImage(mol)
111
- else:
112
- st.error("Invalid SMILES provided.")
113
- except Exception as e:
114
- st.error(f"Error drawing molecule: {e}")
115
- logging.error(e)
116
- return None
117
-
118
- def _get_pubchem_drug_details(drug_name: str) -> Optional[Dict[str, str]]:
119
- """Retrieve drug details (molecular formula, IUPAC name, canonical SMILES) from PubChem."""
120
- url = API_ENDPOINTS["pubchem"].format(drug_name)
121
- data = _query_api(url)
122
- details = {}
123
- if data and data.get("PC_Compounds"):
124
- compound = data["PC_Compounds"][0]
125
- for prop in compound.get("props", []):
126
- urn = prop.get("urn", {})
127
- if urn.get("label") == "Molecular Formula":
128
- details["Molecular Formula"] = prop["value"]["sval"]
129
- if urn.get("name") == "Preferred":
130
- details["IUPAC Name"] = prop["value"]["sval"]
131
- if prop.get("name") == "Canonical SMILES":
132
- details["Canonical SMILES"] = prop["value"]["sval"]
133
- return details
134
- return None
135
-
136
- def _get_clinical_trials(query: str) -> Optional[Dict]:
137
- """Query ClinicalTrials.gov (no email parameter needed)."""
138
- if query.upper().startswith("NCT") and query[3:].isdigit():
139
- params = {"id": query, "fmt": "json"}
140
- else:
141
- params = {"query.term": query, "retmax": 10, "retmode": "json"}
142
- return _query_api(API_ENDPOINTS["clinical_trials"], params)
143
-
144
- def _get_pubmed(query: str) -> Optional[Dict]:
145
- """Query PubMed using E-utilities."""
146
- params = {"db": "pubmed", "term": query, "retmax": 10, "retmode": "json", "email": PUB_EMAIL}
147
- return _query_api(API_ENDPOINTS["pubmed"], params)
148
-
149
- def _get_fda_approval(drug_name: str) -> Optional[Dict]:
150
- """Retrieve FDA drug approval info using openFDA."""
151
- if not OPENFDA_KEY:
152
- st.error("OpenFDA API key not configured.")
153
- return None
154
- query = f'openfda.brand_name:"{drug_name}"'
155
- params = {"api_key": OPENFDA_KEY, "search": query, "limit": 1}
156
- data = _query_api(API_ENDPOINTS["fda_drug_approval"], params)
157
- if data and data.get("results"):
158
- return data["results"][0]
159
- return None
160
-
161
- def _analyze_adverse_events(drug_name: str, limit: int = 5) -> Optional[Dict]:
162
- """Fetch adverse events from FAERS."""
163
- if not OPENFDA_KEY:
164
- st.error("OpenFDA API key not configured.")
165
- return None
166
- query = f'patient.drug.medicinalproduct:"{drug_name}"'
167
- params = {"api_key": OPENFDA_KEY, "search": query, "limit": limit}
168
- return _query_api(API_ENDPOINTS["faers_adverse_events"], params)
169
-
170
- def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
171
- """Get clinical annotations for a PharmGKB variant."""
172
- endpoint = API_ENDPOINTS["pharmgkb_variant_clinical_annotations"].format(variant_id)
173
- data = _query_api(endpoint)
174
- if data and data.get("data"):
175
- return data
176
- st.write(f"No clinical annotations found for variant {variant_id}.")
177
- return None
178
-
179
- def _get_pharmgkb_variants_for_gene(pharmgkb_gene_id: str) -> Optional[List[str]]:
180
- """Retrieve variant IDs for a PharmGKB gene accession (e.g., PA1234)."""
181
- if not pharmgkb_gene_id.startswith("PA"):
182
- st.warning("Please provide a valid PharmGKB accession (e.g., PA1234).")
183
- return None
184
- endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(pharmgkb_gene_id)
185
- data = _query_api(endpoint)
186
- if data and data.get("data"):
187
- return [variant["id"] for variant in data["data"]]
188
- st.warning(f"No variants found for PharmGKB gene {pharmgkb_gene_id}.")
189
- return None
190
-
191
- def get_pharmgkb_gene_data(pharmgkb_gene_id: str) -> Optional[Dict]:
192
- """Retrieve PharmGKB gene data."""
193
- if not pharmgkb_gene_id.startswith("PA"):
194
- st.warning("Please enter a valid PharmGKB gene accession (e.g., PA1234).")
195
- return None
196
- endpoint = API_ENDPOINTS["pharmgkb_gene"].format(pharmgkb_gene_id)
197
- data = _query_api(endpoint)
198
- if data and data.get("data"):
199
- return data["data"][0]
200
- st.write(f"No data found for PharmGKB gene {pharmgkb_gene_id}.")
201
- return None
202
-
203
- def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
204
- """Query BioPortal for ontology data."""
205
- if not BIOPORTAL_API_KEY:
206
- st.error("BioPortal API key not configured.")
207
- return None
208
- if not term:
209
- st.error("Please provide a term for ontology search.")
210
  return None
211
- headers = {"Authorization": f"apikey token={BIOPORTAL_API_KEY}"}
212
- params = {"q": term, "ontologies": ontology}
213
- data = _query_api(API_ENDPOINTS["bioportal_search"], params, headers)
214
- if data and data.get("collection"):
215
- return data
216
- st.warning("No BioPortal results found.")
217
- return None
218
-
219
- def _save_pdf_report(report_content: str, filename: str):
220
- """Save report content as a PDF."""
221
- try:
222
- pdf = FPDF()
223
- pdf.add_page()
224
- pdf.set_font("Arial", size=12)
225
- pdf.multi_cell(0, 10, report_content)
226
- pdf.output(filename)
227
- return filename
228
- except Exception as e:
229
- st.error(f"Error saving PDF: {e}")
230
- logging.error(e)
231
- return None
232
 
233
- def _display_dataframe(data: List[Dict[str, Any]], columns: List[str]):
234
- """Display a dataframe in Streamlit."""
235
- if data:
236
- df = pd.DataFrame(data, columns=columns)
237
- st.dataframe(df)
238
- return df
239
- st.warning("No data available.")
240
- return None
241
-
242
- def _create_variant_network(gene: str, variants: List[str], annotations: Dict) -> go.Figure:
243
- """Create a network graph (gene-variant-drug)."""
244
- G = nx.Graph()
245
- G.add_node(gene, color="lightblue")
246
- for variant in variants:
247
- G.add_node(variant, color="lightgreen")
248
- G.add_edge(gene, variant)
249
- for drug in annotations.get(variant, []):
250
- if drug and drug != "N/A":
251
- G.add_node(drug, color="lightcoral")
252
- G.add_edge(variant, drug)
253
- pos = nx.spring_layout(G)
254
- edge_x, edge_y = [], []
255
- for edge in G.edges():
256
- x0, y0 = pos[edge[0]]
257
- x1, y1 = pos[edge[1]]
258
- edge_x.extend([x0, x1, None])
259
- edge_y.extend([y0, y1, None])
260
- edge_trace = go.Scatter(
261
- x=edge_x, y=edge_y, line=dict(width=0.5, color="#888"),
262
- hoverinfo="none", mode="lines"
263
- )
264
- node_x, node_y, node_text, node_color = [], [], [], []
265
- for node in G.nodes():
266
- x, y = pos[node]
267
- node_x.append(x)
268
- node_y.append(y)
269
- node_text.append(node)
270
- node_color.append(G.nodes[node]["color"])
271
- node_trace = go.Scatter(
272
- x=node_x, y=node_y, mode="markers+text", hoverinfo="text",
273
- text=node_text, textposition="bottom center",
274
- marker=dict(showscale=False, colorscale="YlGnBu",
275
- color=node_color, size=10, line_width=2)
276
- )
277
- fig = go.Figure(
278
- data=[edge_trace, node_trace],
279
- layout=go.Layout(
280
- title=dict(text="Gene-Variant-Drug Network", font=dict(size=16)),
281
- showlegend=False, hovermode="closest",
282
- margin=dict(b=20, l=5, r=5, t=40),
283
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
284
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
285
  )
286
- )
287
- return fig
288
-
289
- # -----------------------------
290
- # RxNorm & RxClass Functions
291
- # -----------------------------
292
- def get_rxnorm_rxcui(drug_name: str) -> Optional[str]:
293
- """Retrieve the RxCUI for a drug name."""
294
- url = f"{API_ENDPOINTS['rxnorm_rxcui']}?name={drug_name}"
295
- data = _query_api(url)
296
- if data and "idGroup" in data and data["idGroup"].get("rxnormId"):
297
- return data["idGroup"]["rxnormId"][0]
298
- st.warning(f"No RxCUI found for {drug_name}.")
299
- return None
300
-
301
- def get_rxnorm_properties(rxcui: str) -> Optional[Dict]:
302
- """Retrieve RxNorm properties for a given RxCUI."""
303
- url = API_ENDPOINTS["rxnorm_properties"].format(rxcui)
304
- return _query_api(url)
305
-
306
- def get_rxclass_by_drug_name(drug_name: str) -> Optional[Dict]:
307
- """Retrieve RxClass info for a drug by name; gracefully handle if not found."""
308
- url = f"{API_ENDPOINTS['rxclass_by_drug']}?drugName={drug_name}"
309
- data = _query_api(url)
310
- if data and "classMember" in data:
311
- return data
312
- return None
313
 
314
  # -----------------------------
315
- # AI-Driven Drug Insights
316
  # -----------------------------
317
- def generate_drug_insights(drug_name: str) -> str:
318
- """Gathers data from FDA, PubChem, RxNorm, and RxClass then uses GPT-4 to generate an innovative analysis."""
319
- # FDA Data
320
- fda_info = _get_fda_approval(drug_name)
321
- fda_status = "Not Approved"
322
- if fda_info and fda_info.get("openfda", {}).get("brand_name"):
323
- fda_status = ", ".join(fda_info["openfda"]["brand_name"])
324
 
325
- # PubChem Details
326
- pubchem_details = _get_pubchem_drug_details(drug_name)
327
- if pubchem_details:
328
- formula = pubchem_details.get("Molecular Formula", "N/A")
329
- iupac = pubchem_details.get("IUPAC Name", "N/A")
330
- canonical_smiles = pubchem_details.get("Canonical SMILES", "N/A")
331
- else:
332
- formula = iupac = canonical_smiles = "Not Available"
333
 
334
- # RxNorm Data
335
- rxnorm_id = get_rxnorm_rxcui(drug_name)
336
- if rxnorm_id:
337
- rx_properties = get_rxnorm_properties(rxnorm_id)
338
- rxnorm_info = f"RxCUI: {rxnorm_id}. Properties: {rx_properties}"
339
- else:
340
- rxnorm_info = "No RxNorm data available."
 
341
 
342
- # RxClass Data
343
- rxclass_data = get_rxclass_by_drug_name(drug_name)
344
- rxclass_info = rxclass_data if rxclass_data else "No RxClass data available."
345
-
346
- # Construct a prompt for GPT-4
347
- prompt = (
348
- f"Drug Analysis Report for '{drug_name}':\n\n"
349
- f"**FDA Approval Status:** {fda_status}\n\n"
350
- f"**PubChem Details:**\n"
351
- f" - Molecular Formula: {formula}\n"
352
- f" - IUPAC Name: {iupac}\n"
353
- f" - Canonical SMILES: {canonical_smiles}\n\n"
354
- f"**RxNorm Data:** {rxnorm_info}\n\n"
355
- f"**RxClass Data:** {rxclass_info}\n\n"
356
- f"As an innovative pharmacogenomics researcher and AI expert, please provide a comprehensive analysis of '{drug_name}', "
357
- f"including pharmacogenomic considerations, potential repurposing opportunities, regulatory insights, and suggestions for further research. "
358
- f"Present your answer in bullet points."
359
- )
360
-
361
- insights = generate_content(prompt)
362
- return insights
 
 
 
 
363
 
364
  # -----------------------------
365
- # STREAMLIT APP TABS
366
  # -----------------------------
367
- tabs = st.tabs([
368
- "💊 Drug Development",
369
- "📊 Trial Analytics",
370
- "🧬 Molecular Profiling",
371
- "📜 Regulatory Intelligence",
372
- "📚 Literature Search",
373
- "📈 Dashboard",
374
- "🧪 Drug Data Integration",
375
- "🤖 AI Insights"
376
- ])
377
-
378
- # ----- Tab 1: Drug Development -----
379
- with tabs[0]:
380
- st.header("AI-Driven Drug Development Strategy")
381
- target = st.text_input("Target Disease/Pathway:", placeholder="Enter disease mechanism or target")
382
- target_gene = st.text_input("Target Gene (PharmGKB Accession):", placeholder="e.g., PA1234")
383
- strategy = st.selectbox("Development Strategy:", ["First-in-class", "Me-too", "Repurposing", "Biologic"])
384
 
385
- if st.button("Generate Development Plan"):
386
- with st.spinner("Generating plan..."):
387
- plan_prompt = (
388
- f"Develop a comprehensive drug development plan for treating {target} using a {strategy} strategy. "
389
- "Include sections on target validation, lead optimization, preclinical testing, clinical trial design, "
390
- "regulatory submission strategy, market analysis, and competitive landscape. Highlight key milestones and challenges."
391
- )
392
- plan = generate_content(plan_prompt)
393
- st.subheader("Comprehensive Development Plan")
394
- st.markdown(plan)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
 
396
- st.subheader("FDA Regulatory Insights")
397
- if target:
398
- fda_info = _get_fda_approval(target.split()[0])
399
- if fda_info:
400
- st.json(fda_info)
401
- else:
402
- st.write("No FDA data found for the given target.")
 
 
403
 
404
- st.subheader("Pharmacogenomic Considerations")
405
- if target_gene:
406
- if not target_gene.startswith("PA"):
407
- st.warning("Please provide a valid PharmGKB accession (e.g., PA1234).")
408
- else:
409
- variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
410
- if variant_ids:
411
- annotations = {}
412
- for vid in variant_ids[:5]:
413
- pgx = _get_pharmgkb_clinical_annotations(vid)
414
- annotations[vid] = [anno.get("obj2Name", "N/A") for anno in pgx.get("data", [])] if pgx else []
415
- st.write(f"### Annotations for Variant: {vid}")
416
- st.json(pgx if pgx else {"message": "No annotations found."})
417
- else:
418
- st.write("No variants found for the specified PharmGKB gene accession.")
419
- else:
420
- st.write("Enter a PharmGKB gene accession to retrieve pharmacogenomic data.")
421
-
422
- # ----- Tab 2: Clinical Trial Analytics -----
423
- with tabs[1]:
424
- st.header("Clinical Trial Landscape Analytics")
425
- trial_query = st.text_input("Search Clinical Trials:", placeholder="Enter condition, intervention, or NCT number")
426
- if st.button("Analyze Trial Landscape"):
427
- with st.spinner("Fetching trial data..."):
428
- trials = _get_clinical_trials(trial_query)
429
- if trials and trials.get("studies"):
430
- trial_data = []
431
- for study in trials["studies"][:5]:
432
- trial_data.append({
433
- "Title": study.get("protocolSection", {}).get("identificationModule", {}).get("briefTitle", "N/A"),
434
- "Status": study.get("protocolSection", {}).get("statusModule", {}).get("overallStatus", "N/A"),
435
- "Phase": study.get("protocolSection", {}).get("designModule", {}).get("phases", ["Not Available"])[0],
436
- "Enrollment": study.get("protocolSection", {}).get("designModule", {}).get("enrollmentInfo", {}).get("count", "N/A")
437
- })
438
- _display_dataframe(trial_data, list(trial_data[0].keys()))
439
- else:
440
- st.warning("No clinical trials found for the query.")
441
 
442
- ae_data = _analyze_adverse_events(trial_query)
443
- if ae_data and ae_data.get("results"):
444
- st.subheader("Adverse Event Profile (Top 5)")
445
- ae_results = ae_data["results"][:5]
446
- ae_df = pd.json_normalize(ae_results)
447
- st.dataframe(ae_df)
448
- if "patient.reaction.reactionmeddrapt" in ae_df.columns:
449
- try:
450
- reactions = ae_df["patient.reaction.reactionmeddrapt"].explode().dropna()
451
- top_reactions = reactions.value_counts().nlargest(10)
452
- fig, ax = plt.subplots(figsize=(10, 6))
453
- sns.barplot(x=top_reactions.values, y=top_reactions.index, ax=ax)
454
- ax.set_title("Top Adverse Reactions")
455
- ax.set_xlabel("Frequency")
456
- ax.set_ylabel("Reaction")
457
- st.pyplot(fig)
458
- except Exception as e:
459
- st.error(f"Error visualizing adverse events: {e}")
460
- else:
461
- st.write("No adverse event data available.")
462
-
463
- # ----- Tab 3: Molecular Profiling -----
464
- with tabs[2]:
465
- st.header("Advanced Molecular Profiling")
466
- compound_input = st.text_input("Compound Identifier:", placeholder="Enter drug name, SMILES, or INN")
467
- if st.button("Analyze Compound"):
468
- with st.spinner("Querying PubChem..."):
469
- if Chem.MolFromSmiles(compound_input):
470
- smiles = compound_input
471
- else:
472
- smiles = _get_pubchem_smiles(compound_input)
473
- if smiles:
474
- img = _draw_molecule(smiles)
475
- if img:
476
- st.image(img, caption="2D Molecular Structure")
477
- else:
478
- st.error("Structure not found. Please try a more specific compound name.")
479
- pubchem_data = _query_api(API_ENDPOINTS["pubchem"].format(compound_input))
480
- if pubchem_data and pubchem_data.get("PC_Compounds"):
481
- st.subheader("Physicochemical Properties")
482
- props = pubchem_data["PC_Compounds"][0].get("props", [])
483
- mw = next((prop["value"]["sval"] for prop in props if prop.get("name") == "Molecular Weight"), "N/A")
484
- logp = next((prop["value"]["sval"] for prop in props if prop.get("name") == "LogP"), "N/A")
485
- st.write(f"**Molecular Weight:** {mw} \n**LogP:** {logp}")
486
- else:
487
- st.error("Physicochemical properties not available.")
488
-
489
- # ----- Tab 4: Regulatory Intelligence -----
490
- with tabs[3]:
491
- st.header("Global Regulatory Monitoring")
492
- st.markdown("**Note:** This section now focuses on FDA data and generic drug details from PubChem.")
493
- drug_prod = st.text_input("Drug Product:", placeholder="Enter generic or brand name")
494
- if st.button("Generate Regulatory Report"):
495
- with st.spinner("Compiling regulatory data..."):
496
- fda_info = _get_fda_approval(drug_prod)
497
- fda_status = "Not Approved"
498
- if fda_info and fda_info.get("openfda", {}).get("brand_name"):
499
- fda_status = ", ".join(fda_info["openfda"]["brand_name"])
500
- pubchem_details = _get_pubchem_drug_details(drug_prod)
501
- if pubchem_details:
502
- formula = pubchem_details.get("Molecular Formula", "N/A")
503
- iupac = pubchem_details.get("IUPAC Name", "N/A")
504
- canon_smiles = pubchem_details.get("Canonical SMILES", "N/A")
505
- else:
506
- formula = iupac = canon_smiles = "Not Available"
507
- col1, col2 = st.columns(2)
508
- with col1:
509
- st.markdown("**FDA Status**")
510
- st.write(fda_status)
511
- with col2:
512
- st.markdown("**Drug Details (PubChem)**")
513
- st.write(f"**Molecular Formula:** {formula}")
514
- st.write(f"**IUPAC Name:** {iupac}")
515
- st.write(f"**Canonical SMILES:** {canon_smiles}")
516
- report_text = (
517
- f"### Regulatory Report for {drug_prod}\n\n"
518
- f"**FDA Status:** {fda_status}\n\n"
519
- f"**Molecular Formula:** {formula}\n\n"
520
- f"**IUPAC Name:** {iupac}\n\n"
521
- f"**Canonical SMILES:** {canon_smiles}\n"
522
- )
523
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
524
- pdf_file = _save_pdf_report(report_text, tmp.name)
525
- if pdf_file:
526
- with open(pdf_file, "rb") as f:
527
- st.download_button("Download Regulatory Report (PDF)", data=f, file_name=f"{drug_prod}_report.pdf", mime="application/pdf")
528
- os.remove(pdf_file)
529
-
530
- # ----- Tab 5: Literature Search -----
531
- with tabs[4]:
532
- st.header("Literature Search")
533
- lit_query = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics")
534
- if st.button("Search PubMed"):
535
- with st.spinner("Searching PubMed..."):
536
- pubmed_results = _get_pubmed(lit_query)
537
- if pubmed_results and pubmed_results.get("esearchresult", {}).get("idlist"):
538
- id_list = pubmed_results["esearchresult"]["idlist"]
539
- st.subheader(f"Found {len(id_list)} PubMed Results")
540
- for pmid in id_list:
541
- st.markdown(f"- [PMID: {pmid}](https://pubmed.ncbi.nlm.nih.gov/{pmid}/)")
542
- else:
543
- st.write("No PubMed results found.")
544
- st.header("Ontology Search")
545
- ont_query = st.text_input("Enter search query for Ontology:", placeholder="e.g., Alzheimer's disease")
546
- ont_select = st.selectbox("Select Ontology", ["MESH", "NCIT", "GO", "SNOMEDCT"])
547
- if st.button("Search BioPortal"):
548
- with st.spinner("Searching BioPortal..."):
549
- bioportal_results = _get_bioportal_data(ont_select, ont_query)
550
- if bioportal_results and bioportal_results.get("collection"):
551
- st.subheader(f"BioPortal Results for {ont_select}")
552
- for item in bioportal_results["collection"]:
553
- label = item.get("prefLabel", "N/A")
554
- ont_id = item.get("@id", "N/A")
555
- st.markdown(f"- **{label}** ({ont_id})")
556
- else:
557
- st.write("No ontology results found.")
558
-
559
- # ----- Tab 6: Dashboard -----
560
- with tabs[5]:
561
- st.header("Comprehensive Dashboard")
562
- # Placeholder KPIs – replace with real-time aggregated data if available
563
- kpi_fda = 5000
564
- kpi_trials = 12000
565
- kpi_pubs = 250000
566
- col1, col2, col3 = st.columns(3)
567
- col1.metric("FDA Approved Drugs", kpi_fda)
568
- col2.metric("Ongoing Trials", kpi_trials)
569
- col3.metric("Publications", kpi_pubs)
570
- st.subheader("Trend Analysis")
571
- years = list(range(2000, 2026))
572
- approvals = [kpi_fda // len(years)] * len(years) # Sample static data
573
- fig_trend, ax_trend = plt.subplots(figsize=(10, 6))
574
- sns.lineplot(x=years, y=approvals, marker="o", ax=ax_trend)
575
- ax_trend.set_title("FDA Approvals Over Time")
576
- ax_trend.set_xlabel("Year")
577
- ax_trend.set_ylabel("Number of Approvals")
578
- st.pyplot(fig_trend)
579
- st.subheader("Gene-Variant-Drug Network (Sample)")
580
- sample_gene = "CYP2C19"
581
- sample_variants = ["rs4244285", "rs12248560"]
582
- sample_annots = {"rs4244285": ["Clopidogrel", "Omeprazole"], "rs12248560": ["Sertraline"]}
583
- try:
584
- net_fig = _create_variant_network(sample_gene, sample_variants, sample_annots)
585
- st.plotly_chart(net_fig, use_container_width=True)
586
- except Exception as e:
587
- st.error(f"Network graph error: {e}")
588
-
589
- # ----- Tab 7: Drug Data Integration -----
590
- with tabs[6]:
591
- st.header("🧪 Drug Data Integration")
592
- drug_integration = st.text_input("Enter Drug Name for API Integration:", placeholder="e.g., aspirin")
593
- if st.button("Retrieve Drug Data"):
594
- with st.spinner("Fetching drug data..."):
595
- rxnorm_id = get_rxnorm_rxcui(drug_integration)
596
- if rxnorm_id:
597
- rx_props = get_rxnorm_properties(rxnorm_id)
598
- else:
599
- rx_props = None
600
- rxclass_info = get_rxclass_by_drug_name(drug_integration)
601
- st.subheader("RxNorm Data")
602
- if rxnorm_id:
603
- st.write(f"RxCUI for {drug_integration}: {rxnorm_id}")
604
- st.json(rx_props if rx_props else {"message": "No RxNorm properties found."})
605
- else:
606
- st.write("No RxCUI found.")
607
- st.subheader("RxClass Information")
608
- if rxclass_info:
609
- st.json(rxclass_info)
610
- else:
611
- st.write("No RxClass data found for the given drug.")
612
- pubchem_info = _get_pubchem_drug_details(drug_integration)
613
- st.subheader("PubChem Drug Details")
614
- if pubchem_info:
615
- st.write(f"**Molecular Formula:** {pubchem_info.get('Molecular Formula', 'N/A')}")
616
- st.write(f"**IUPAC Name:** {pubchem_info.get('IUPAC Name', 'N/A')}")
617
- st.write(f"**Canonical SMILES:** {pubchem_info.get('Canonical SMILES', 'N/A')}")
618
- else:
619
- st.write("No PubChem details found.")
620
 
621
- # ----- Tab 8: AI Insights -----
622
- with tabs[7]:
623
- st.header("🤖 AI Insights")
624
- ai_drug = st.text_input("Enter Drug Name for AI-Driven Analysis:", placeholder="e.g., tylenol")
625
- if st.button("Generate AI Insights"):
626
- with st.spinner("Generating insights..."):
627
- insights_text = generate_drug_insights(ai_drug)
628
- st.subheader("AI-Driven Drug Analysis")
629
- st.markdown(insights_text)
 
1
+ """
2
+ Pharma Research Intelligence Suite (PRIS)
3
+ A Next-Generation Platform for AI-Driven Drug Discovery and Development
4
+ """
5
+
6
+ # -----------------------------
7
+ # IMPORTS & CONFIGURATION
8
+ # -----------------------------
9
  import streamlit as st
10
  import requests
11
  from rdkit import Chem
 
19
  import os
20
  import plotly.graph_objects as go
21
  import networkx as nx
22
+ from typing import Optional, Dict, List, Any, Tuple
23
+ from openai import OpenAI
24
 
25
+ # Configure professional logging
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
29
+ handlers=[logging.FileHandler("pris_debug.log")]
30
+ )
31
+ logger = logging.getLogger("PRIS")
32
 
33
  # -----------------------------
34
+ # GLOBAL CONSTANTS
35
  # -----------------------------
36
  API_ENDPOINTS = {
37
+ # Clinical Data Services
38
+ "clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
 
39
  "fda_drug_approval": "https://api.fda.gov/drug/label.json",
40
  "faers_adverse_events": "https://api.fda.gov/drug/event.json",
41
+
42
+ # Chemical & Biological Data
43
+ "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
44
+ "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
45
+
46
+ # Pharmacogenomics Resources
47
  "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
48
  "pharmgkb_gene": "https://api.pharmgkb.org/v1/data/gene/{}",
49
  "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
50
+
51
+ # Semantic Medical Resources
52
  "bioportal_search": "https://data.bioontology.org/search",
53
+
54
+ # Drug Classification Systems
55
  "rxnorm_rxcui": "https://rxnav.nlm.nih.gov/REST/rxcui.json",
56
  "rxnorm_properties": "https://rxnav.nlm.nih.gov/REST/rxcui/{}/properties.json",
 
57
  "rxclass_by_drug": "https://rxnav.nlm.nih.gov/REST/class/byDrugName.json"
58
  }
59
 
60
+ DEFAULT_HEADERS = {
61
+ "User-Agent": "PharmaResearchIntelligenceSuite/1.0 (Professional Use)",
62
+ "Accept": "application/json"
63
+ }
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # -----------------------------
66
+ # SECRETS MANAGEMENT
67
  # -----------------------------
68
+ class APIConfigurationError(Exception):
69
+ """Custom exception for missing API configurations"""
70
+ pass
71
+
72
+ try:
73
+ OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
74
+ BIOPORTAL_API_KEY = st.secrets["BIOPORTAL_API_KEY"]
75
+ PUB_EMAIL = st.secrets["PUB_EMAIL"]
76
+ OPENFDA_KEY = st.secrets["OPENFDA_KEY"]
77
+
78
+ # Validate essential configurations
79
+ if not all([OPENAI_API_KEY, BIOPORTAL_API_KEY, PUB_EMAIL, OPENFDA_KEY]):
80
+ raise APIConfigurationError("Missing one or more required API credentials")
81
+
82
+ except (KeyError, APIConfigurationError) as e:
83
+ st.error(f"Critical configuration error: {str(e)}")
84
+ st.stop()
85
 
86
  # -----------------------------
87
+ # CORE INFRASTRUCTURE
88
  # -----------------------------
89
+ class PharmaResearchEngine:
90
+ """Core engine for pharmaceutical data integration and analysis"""
91
+
92
+ def __init__(self):
93
+ self.openai_client = OpenAI(api_key=OPENAI_API_KEY)
94
+
95
+ @staticmethod
96
+ def api_request(endpoint: str,
97
+ params: Optional[Dict] = None,
98
+ headers: Optional[Dict] = None) -> Optional[Dict]:
99
+ """Enterprise-grade API request handler with advanced resilience"""
100
+ try:
101
+ response = requests.get(
102
+ endpoint,
103
+ params=params,
104
+ headers={**DEFAULT_HEADERS, **(headers or {})},
105
+ timeout=(3.05, 15)
106
+ )
107
+ response.raise_for_status()
108
+ return response.json()
109
+ except requests.exceptions.HTTPError as e:
110
+ logger.error(f"HTTP Error {e.response.status_code} for {endpoint}")
111
+ st.error(f"API Error: {e.response.status_code} - {e.response.reason}")
112
+ except Exception as e:
113
+ logger.error(f"Network error for {endpoint}: {str(e)}")
114
+ st.error(f"Network error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ def get_compound_profile(self, identifier: str) -> Optional[Dict]:
118
+ """Retrieve comprehensive chemical profile"""
119
+ pubchem_data = self.api_request(
120
+ API_ENDPOINTS["pubchem"].format(identifier)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  )
122
+
123
+ if not pubchem_data or not pubchem_data.get("PC_Compounds"):
124
+ return None
125
+
126
+ compound = pubchem_data["PC_Compounds"][0]
127
+ return {
128
+ 'molecular_formula': self._extract_property(compound, 'Molecular Formula'),
129
+ 'iupac_name': self._extract_property(compound, 'IUPAC Name'),
130
+ 'canonical_smiles': self._extract_property(compound, 'Canonical SMILES'),
131
+ 'molecular_weight': self._extract_property(compound, 'Molecular Weight'),
132
+ 'logp': self._extract_property(compound, 'LogP')
133
+ }
134
+
135
+ def _extract_property(self, compound: Dict, prop_name: str) -> str:
136
+ """Helper for property extraction from PubChem data"""
137
+ for prop in compound.get("props", []):
138
+ if prop.get("urn", {}).get("label") == prop_name:
139
+ return prop["value"]["sval"]
140
+ return "N/A"
 
 
 
 
 
 
 
 
141
 
142
  # -----------------------------
143
+ # INTELLIGENCE MODULES
144
  # -----------------------------
145
+ class ClinicalIntelligence:
146
+ """Handles clinical trial and regulatory data analysis"""
 
 
 
 
 
147
 
148
+ def __init__(self):
149
+ self.engine = PharmaResearchEngine()
 
 
 
 
 
 
150
 
151
+ def get_trial_landscape(self, query: str) -> List[Dict]:
152
+ """Analyze clinical trial landscape for given query"""
153
+ params = {"query.term": query, "retmax": 10} if not query.startswith("NCT") else {"id": query}
154
+ trials = self.engine.api_request(API_ENDPOINTS["clinical_trials"], params=params)
155
+ return trials.get("studies", [])[:5]
156
+
157
+ class AIDrugInnovator:
158
+ """GPT-4 powered drug development strategist"""
159
 
160
+ def __init__(self):
161
+ self.engine = PharmaResearchEngine()
162
+
163
+ def generate_strategy(self, target: str, strategy: str) -> str:
164
+ """Generate AI-driven development strategy"""
165
+ prompt = f"""As Chief Scientific Officer of a top pharmaceutical company, develop a {strategy} strategy for {target}.
166
+ Include:
167
+ - Target validation approach
168
+ - Lead optimization tactics
169
+ - Clinical trial design
170
+ - Regulatory pathway analysis
171
+ - Commercial potential assessment
172
+ Format in Markdown with clear sections."""
173
+
174
+ try:
175
+ response = self.engine.openai_client.chat.completions.create(
176
+ model="gpt-4",
177
+ messages=[{"role": "user", "content": prompt}],
178
+ temperature=0.7,
179
+ max_tokens=1500
180
+ )
181
+ return response.choices[0].message.content
182
+ except Exception as e:
183
+ logger.error(f"AI Strategy Error: {str(e)}")
184
+ return "Strategy generation failed. Please check API configuration."
185
 
186
  # -----------------------------
187
+ # STREAMLIT INTERFACE
188
  # -----------------------------
189
+ class PharmaResearchInterface:
190
+ """Modern UI for pharmaceutical research platform"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ def __init__(self):
193
+ self.clinical_intel = ClinicalIntelligence()
194
+ self.ai_innovator = AIDrugInnovator()
195
+ self._configure_page()
196
+
197
+ def _configure_page(self):
198
+ """Setup Streamlit page configuration"""
199
+ st.set_page_config(
200
+ page_title="PRIS - Pharma Research Intelligence Suite",
201
+ layout="wide",
202
+ initial_sidebar_state="expanded"
203
+ )
204
+ st.markdown("""
205
+ <style>
206
+ .main {background-color: #f9f9f9;}
207
+ .stAlert {padding: 20px;}
208
+ .reportview-container .markdown-text-container {font-family: 'Arial'}
209
+ </style>
210
+ """, unsafe_allow_html=True)
211
+
212
+ def render(self):
213
+ """Main application interface"""
214
+ st.title("Pharma Research Intelligence Suite")
215
+ self._render_navigation()
216
 
217
+ def _render_navigation(self):
218
+ """Dynamic tab-based navigation system"""
219
+ tabs = st.tabs([
220
+ "🚀 Drug Innovation",
221
+ "📈 Trial Analytics",
222
+ "🧪 Compound Profiler",
223
+ "📜 Regulatory Hub",
224
+ "🤖 AI Strategist"
225
+ ])
226
 
227
+ with tabs[0]: self._drug_innovation()
228
+ with tabs[1]: self._trial_analytics()
229
+ with tabs[2]: self._compound_profiler()
230
+ with tabs[3]: self._regulatory_hub()
231
+ with tabs[4]: self._ai_strategist()
232
+
233
+ def _drug_innovation(self):
234
+ """Drug development strategy interface"""
235
+ st.header("AI-Powered Drug Innovation Engine")
236
+ col1, col2 = st.columns([1, 3])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ with col1:
239
+ target = st.text_input("Target Pathobiology:", placeholder="e.g., EGFR mutant NSCLC")
240
+ strategy = st.selectbox("Development Paradigm:",
241
+ ["First-in-class", "Fast-follower", "Biologic", "ADC", "Gene Therapy"])
242
+ if st.button("Generate Development Blueprint"):
243
+ with st.spinner("Formulating strategic plan..."):
244
+ blueprint = self.ai_innovator.generate_strategy(target, strategy)
245
+ st.markdown(blueprint, unsafe_allow_html=True)
246
+
247
+ def _compound_profiler(self):
248
+ """Advanced chemical analysis interface"""
249
+ st.header("Multi-Omics Compound Profiler")
250
+ compound = st.text_input("Analyze Compound:", placeholder="Enter drug name or SMILES")
251
+
252
+ if compound:
253
+ with st.spinner("Decoding molecular profile..."):
254
+ profile = PharmaResearchEngine().get_compound_profile(compound)
255
+
256
+ if profile:
257
+ col1, col2 = st.columns(2)
258
+ with col1:
259
+ st.subheader("Structural Insights")
260
+ mol = Chem.MolFromSmiles(profile['canonical_smiles'])
261
+ if mol:
262
+ img = Draw.MolToImage(mol, size=(400, 300))
263
+ st.image(img, caption="2D Molecular Structure")
264
+
265
+ with col2:
266
+ st.subheader("Physicochemical Profile")
267
+ st.metric("Molecular Weight", profile['molecular_weight'])
268
+ st.metric("LogP", profile['logp'])
269
+ st.metric("IUPAC Name", profile['iupac_name'])
270
+ st.code(f"SMILES: {profile['canonical_smiles']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
+ # -----------------------------
273
+ # MAIN EXECUTION
274
+ # -----------------------------
275
+ if __name__ == "__main__":
276
+ interface = PharmaResearchInterface()
277
+ interface.render()