mgbam commited on
Commit
7cabb5b
·
verified ·
1 Parent(s): 5d7eb2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +566 -201
app.py CHANGED
@@ -6,7 +6,7 @@ from fpdf import FPDF
6
  import tempfile
7
  import time
8
  import requests
9
- import xml.etree.ElementTree as ET
10
  import json
11
  import pandas as pd
12
  import matplotlib.pyplot as plt
@@ -14,108 +14,106 @@ import seaborn as sns
14
  from typing import Optional, Dict, List, Any
15
  import os
16
  import logging
 
17
 
18
  # Setup logging
19
- logging.basicConfig(level=logging.ERROR) #Log only errors
20
 
21
  # API Endpoints (Centralized Configuration)
22
  API_ENDPOINTS = {
23
  "clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
24
  "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
25
- "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
26
  "who_drugs": "https://health-products.canada.ca/api/drug/product",
27
- #"ema_reports": "https://www.ema.europa.eu/api/search/medicines", #Removed due to 403
28
- "fda_drug_approval": "https://api.fda.gov/drug/label.json", # Updated this to use base API
29
- "faers_adverse_events": "https://api.fda.gov/drug/event.json", # Updated this to use base API
30
- "pharmgkb": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
31
- "bioportal": "https://data.bioontology.org/ontologies"
 
 
32
  }
33
 
34
- #Email addresses
35
- #Email addresses
36
- if "PUB_EMAIL" in st.secrets:
37
- PUBMED_EMAIL = st.secrets["PUB_EMAIL"]
38
- else:
39
- PUBMED_EMAIL = None
40
- st.error("PubMed email not found in secrets. Please add the PUB_EMAIL to secrets.")
41
  CLINICALTRIALS_EMAIL = PUBMED_EMAIL
42
 
43
- # Retrieve the BioPortal API Key from secrets
44
- if "BIOPORTAL_API_KEY" in st.secrets:
45
- BIOPORTAL_API_KEY = st.secrets["BIOPORTAL_API_KEY"]
46
- else:
47
- BIOPORTAL_API_KEY = None
48
- st.error("BioPortal API key not found in secrets. Please add the BIOPORTAL_API_KEY to secrets.")
49
-
50
- # Retrieve the OpenFDA API Key from secrets
51
- if "OPENFDA_KEY" in st.secrets:
52
- OPENFDA_KEY = st.secrets["OPENFDA_KEY"]
53
- else:
54
- OPENFDA_KEY = None
55
- st.error("OpenFDA API key not found in secrets. Please add the OPENFDA_KEY to secrets.")
56
-
57
  # Initialize AI Agent (Context-aware)
58
  content_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=HfApiModel())
59
 
60
  # --- Utility Functions ---
61
- def _query_api(endpoint: str, params: Optional[Dict] = None) -> Optional[Dict]:
62
  """Handles API requests with robust error handling."""
63
  try:
64
- response = requests.get(endpoint, params=params, timeout=15)
65
  response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
66
  return response.json()
67
- except requests.exceptions.RequestException as e:
68
- st.error(f"API request failed: {e} for endpoint {endpoint}. Please check connectivity and the endpoint.")
69
- logging.error(f"API request failed: {e} for endpoint {endpoint}.")
70
- return None
 
 
 
 
 
 
71
 
72
- def _query_pubmed(query: str, email: Optional[str] = PUBMED_EMAIL) -> Optional[Dict]:
73
- """Queries PubMed with robust error handling."""
74
- if not email:
75
- st.error("PubMed email not configured.")
76
- return None
77
-
78
- params = {
79
- "db": "pubmed",
80
- "term": query,
81
- "retmax": 10,
82
- "retmode": "json",
83
- "email": email
84
- }
85
- data = _query_api(API_ENDPOINTS["pubmed"], params)
86
- if data and 'esearchresult' in data:
87
- return data
88
  else:
 
89
  return None
90
 
91
- def _safe_get(endpoint: str, params: Optional[Dict] = None):
92
- """Safely fetches data from an API."""
93
- try:
94
- response = requests.get(endpoint, params=params)
95
- response.raise_for_status() # Raises HTTPError for bad responses
96
- return response.json()
97
- except requests.exceptions.HTTPError as http_err:
98
- st.error(f"HTTP error occurred: {http_err}")
99
- logging.error(f"HTTP error occurred: {http_err}")
100
- return None
101
- except Exception as err:
102
- st.error(f"Other error occurred: {err}")
103
- logging.error(f"Other error occurred: {err}")
104
- return None
 
 
 
 
105
 
106
  def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
107
  """Retrieves SMILES from PubChem, returns None on failure."""
108
  url = API_ENDPOINTS["pubchem"].format(drug_name)
109
  data = _query_api(url)
110
- if data and 'PC_Compounds' in data and data['PC_Compounds'][0]['props']:
111
- #Check if props exists and find SMILES value
112
- for prop in data['PC_Compounds'][0]['props']:
113
  if 'name' in prop and prop['name'] == 'Canonical SMILES':
114
  return prop['value']['sval']
115
  return None
116
 
117
-
118
- def _draw_molecule(smiles: str) -> Optional[any]:
119
  """Generates a 2D molecule image from SMILES."""
120
  try:
121
  mol = Chem.MolFromSmiles(smiles)
@@ -130,8 +128,7 @@ def _draw_molecule(smiles: str) -> Optional[any]:
130
  logging.error(f"Error generating molecule image: {str(e)}")
131
  return None
132
 
133
-
134
- def _get_clinical_trials(query: str, email:Optional[str] = CLINICALTRIALS_EMAIL) -> Optional[Dict]:
135
  """Queries clinicaltrials.gov with search term."""
136
  if not email:
137
  st.error("Clinical Trials email not configured.")
@@ -143,97 +140,298 @@ def _get_clinical_trials(query: str, email:Optional[str] = CLINICALTRIALS_EMAIL)
143
  }
144
  else:
145
  params = {
146
- "query.term": query,
147
- "fmt": "json",
148
- "email": email
 
149
  }
150
  return _query_api(API_ENDPOINTS["clinical_trials"], params)
151
 
152
-
153
- def _get_fda_approval(drug_name: str, api_key:Optional[str] = OPENFDA_KEY) -> Optional[Dict]:
154
  """Retrieves FDA approval info."""
155
  if not api_key:
156
- st.error("OpenFDA key not configured.")
157
- return None
158
- url = f"{API_ENDPOINTS['fda_drug_approval']}?api_key={api_key}&search=openfda.brand_name:\"{drug_name}\""
159
- data = _query_api(url)
160
- if data and 'results' in data and data['results']:
 
 
 
 
 
161
  return data['results'][0]
162
  else:
163
- return None
164
 
165
- def _analyze_adverse_events(drug_name: str, api_key:Optional[str] = OPENFDA_KEY, limit: int = 5) -> Optional[Dict]:
166
  """Fetches and analyzes adverse event reports from FAERS."""
167
  if not api_key:
168
  st.error("OpenFDA key not configured.")
169
  return None
170
- url = f"{API_ENDPOINTS['faers_adverse_events']}?api_key={api_key}&search=patient.drug.medicinalproduct:\"{drug_name}\"&limit={limit}"
171
- data = _query_api(url)
 
 
 
 
 
172
  if data and 'results' in data:
173
  return data
174
  else:
175
  return None
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- def _get_pharmgkb_data(gene:str) -> Optional[Dict]:
179
- """Fetches pharmacogenomic data from PharmGKB."""
180
- url = f"https://api.pharmgkb.org/v1/data/variant/{gene}/clinicalAnnotations"
181
- data = _query_api(url)
182
- if data and 'clinicalAnnotations' in data:
183
- return data
184
- else:
185
- return None
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
188
  """Fetches data from BioPortal."""
189
  if not BIOPORTAL_API_KEY:
190
- st.error("BioPortal API key not found. Please add the BIOPORTAL_API_KEY to secrets.")
191
- return None
192
  if not term:
193
- st.error("Please provide a search term.")
194
- return None
195
 
196
  headers = {
197
  "Authorization": f"apikey token={BIOPORTAL_API_KEY}"
198
- }
199
  params = {
200
  "q": term,
201
  "ontologies": ontology
202
  }
203
 
204
- url = f"{API_ENDPOINTS['bioportal']}/search"
205
  try:
206
- response = requests.get(url, headers=headers, params=params, timeout=15)
207
- response.raise_for_status()
208
- data = response.json()
209
  if data and 'collection' in data:
210
  return data
211
  else:
212
  st.warning("No results found for the BioPortal query.")
213
  return None
214
- except requests.exceptions.RequestException as e:
215
  st.error(f"BioPortal API request failed: {e} Please check connectivity and ensure you have the correct API Key.")
216
  logging.error(f"BioPortal API request failed: {e}")
217
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  def _save_pdf_report(report_content: str, filename: str):
220
  """Saves content to a PDF file."""
221
- pdf = FPDF()
222
- pdf.add_page()
223
- pdf.set_font("Arial", size=12)
224
- pdf.multi_cell(0, 10, report_content)
225
- pdf.output(filename)
226
- return filename
227
-
228
- def _display_dataframe(data: list, columns: list):
 
 
 
 
 
229
  """Displays data in a dataframe format."""
230
  if data:
231
  df = pd.DataFrame(data, columns=columns)
232
  st.dataframe(df)
233
  return df
234
  else:
235
- st.warning("No data found for dataframe creation.")
236
- return None
237
 
238
  # --- Streamlit App Configuration ---
239
  st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
@@ -241,7 +439,7 @@ st.title("🔬 Pharma Research Expert Platform")
241
  st.markdown("An integrated platform for drug discovery, clinical research, and regulatory affairs.")
242
 
243
  # --- Tabs ---
244
- tabs = st.tabs(["💊 Drug Development", "📊 Trial Analytics", "🧬 Molecular Profiling", "📜 Regulatory Intelligence", "📚 Literature Search"])
245
 
246
  # --- Tab 1: Drug Development ---
247
  with tabs[0]:
@@ -254,7 +452,7 @@ with tabs[0]:
254
  with st.spinner("Analyzing target and competitive landscape..."):
255
  # AI-generated content with regulatory checks
256
  plan_prompt = f"""Develop a comprehensive drug development plan for the treatment of {target} using a {strategy} strategy.
257
- Include sections on target validation, lead optimization, preclinical testing, clinical trial design, regulatory submission strategy, market analysis, and competitive landscape. Highlight key milestones and potential challenges. """
258
  plan = content_agent.run(plan_prompt)
259
 
260
  st.subheader("Comprehensive Development Plan")
@@ -270,16 +468,48 @@ with tabs[0]:
270
  else:
271
  st.write("No relevant FDA data found.")
272
  else:
273
- st.write("Please enter a target to get relevant FDA data")
274
 
275
  # Pharmacogenomic integration
276
  st.subheader("Pharmacogenomic Considerations")
277
- pgx_data = _get_pharmgkb_data(target_gene)
278
- if pgx_data:
279
- st.write(pgx_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  else:
281
- st.write("No relevant pharmacogenomic data found.")
282
-
283
 
284
  # --- Tab 2: Clinical Trial Analytics ---
285
  with tabs[1]:
@@ -289,17 +519,16 @@ with tabs[1]:
289
  if st.button("Analyze Trial Landscape"):
290
  with st.spinner("Aggregating global trial data..."):
291
  trials = _get_clinical_trials(trial_query)
292
- if trials and trials['studies']:
293
  st.subheader("Recent Clinical Trials")
294
  trial_data = []
295
  for study in trials['studies'][:5]:
296
-
297
  trial_data.append({
298
- "Title": study['briefTitle'],
299
- "Status": study['overallStatus'],
300
- "Phase": study['phase'] if 'phase' in study else 'Not Available',
301
- "Enrollment": study['enrollmentCount'] if 'enrollmentCount' in study else 'Not Available'
302
- })
303
 
304
  trial_df = _display_dataframe(trial_data, list(trial_data[0].keys())) if trial_data else None
305
 
@@ -307,51 +536,38 @@ with tabs[1]:
307
  st.markdown("### Clinical Trial Summary (First 5 trials)")
308
  st.dataframe(trial_df)
309
 
310
-
311
  # Adverse events analysis
312
  ae_data = _analyze_adverse_events(trial_query)
313
- if ae_data and ae_data['results']:
314
  st.subheader("Adverse Event Profile (Top 5 Reports)")
315
 
316
  ae_results = ae_data['results'][:5]
317
- ae_df = pd.DataFrame(ae_results)
318
  st.dataframe(ae_df)
319
 
320
- #Visualization of adverse events
321
- if 'patient' in ae_df and not ae_df.empty:
322
  try:
323
- drug_events = []
324
- for patient in ae_df['patient']:
325
- if isinstance(patient,dict) and 'drug' in patient:
326
- for drug in patient['drug']:
327
- if isinstance(drug,dict) and 'medicinalproduct' in drug and 'reaction' in patient:
328
- reactions = [reaction.get('reactionmeddrapt','') for reaction in patient['reaction']]
329
- for r in reactions:
330
- drug_events.append((drug.get('medicinalproduct', 'N/A'), r))
331
-
332
- df_drug_events = pd.DataFrame(drug_events,columns=['Drug', 'Reaction'])
333
- # Aggregate and Visualize top reactions
334
- if not df_drug_events.empty:
335
- top_reactions = df_drug_events['Reaction'].value_counts().nlargest(10)
336
-
337
- fig, ax = plt.subplots(figsize=(10,6))
338
- sns.barplot(x=top_reactions.index, y=top_reactions.values, ax=ax)
339
- ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
340
- plt.title('Top Adverse Reactions')
341
- plt.xlabel('Adverse Reaction')
342
- plt.ylabel('Frequency')
343
- st.pyplot(fig)
344
-
345
- #Display as dataframe
346
- st.markdown("### Top 10 Adverse Reaction Summary")
347
- st.dataframe(pd.DataFrame({'Reaction': top_reactions.index, 'Frequency': top_reactions.values}))
348
-
349
  except Exception as e:
350
  st.error(f"Error processing adverse events data: {e}")
 
 
351
  else:
352
  st.warning("No clinical trials found for the given search term.")
353
 
354
-
355
  # --- Tab 3: Molecular Profiling ---
356
  with tabs[2]:
357
  st.header("Advanced Molecular Profiling")
@@ -360,8 +576,12 @@ with tabs[2]:
360
 
361
  if st.button("Analyze Compound"):
362
  with st.spinner("Querying global databases..."):
363
- # SMILES resolution
364
- smiles = compound_input if Chem.MolFromSmiles(compound_input) else _get_pubchem_smiles(compound_input)
 
 
 
 
365
 
366
  if smiles:
367
  img = _draw_molecule(smiles)
@@ -369,22 +589,24 @@ with tabs[2]:
369
  st.image(img, caption="2D Structure")
370
  else:
371
  st.error("Compound structure not found in databases.")
372
-
373
  # PubChem properties
374
- pubchem_data = _query_api(API_ENDPOINTS["pubchem"].format(compound_input))
375
- if pubchem_data and 'PC_Compounds' in pubchem_data and pubchem_data['PC_Compounds']:
376
- st.subheader("Physicochemical Properties")
377
- props = pubchem_data['PC_Compounds'][0]['props']
378
- mw = next((prop['value']['sval'] for prop in props if 'name' in prop and prop['name'] == 'Molecular Weight'), 'N/A')
379
- logp = next((prop['value']['sval'] for prop in props if 'name' in prop and prop['name'] == 'LogP'), 'N/A')
380
-
381
- st.write(f"""
382
- Molecular Weight: {mw}
383
- LogP: {logp}
384
- """)
 
 
 
385
  else:
386
- st.error("Physicochemical properties not found.")
387
-
388
 
389
  # --- Tab 4: Regulatory Intelligence ---
390
  with tabs[3]:
@@ -393,34 +615,70 @@ with tabs[3]:
393
 
394
  if st.button("Generate Regulatory Report"):
395
  with st.spinner("Compiling global regulatory status..."):
396
- # Multi-regional checks
397
- fda = _get_fda_approval(drug_name)
398
- # ema = _query_api(API_ENDPOINTS["ema_reports"], {"search": drug_name}) #Removed EMA due to 403 error
399
- who = _query_api(API_ENDPOINTS["who_drugs"], {"name": drug_name})
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
  st.subheader("Regulatory Status")
402
- col1, col2, col3 = st.columns(3)
403
  with col1:
404
  st.markdown("**FDA Status**")
405
- st.write(fda['openfda']['brand_name'][0] if fda and 'openfda' in fda and 'brand_name' in fda['openfda'] else "Not approved")
406
  with col2:
407
  st.markdown("**EMA Status**")
408
- #st.write(ema['results'][0]['currentStatus'] if ema and 'results' in ema and ema['results'] else "Not approved") #Removed EMA due to 403 error
409
- st.write("Not Available")
410
  with col3:
411
  st.markdown("**WHO Essential Medicine**")
412
- st.write("Yes" if who else "No")
413
-
 
 
 
 
 
 
 
 
 
414
  # Save the information to a PDF report
415
- regulatory_content = f"### Regulatory Report\n\nFDA Status: {fda['openfda']['brand_name'][0] if fda and 'openfda' in fda and 'brand_name' in fda['openfda'] else 'Not Approved'}\n\nEMA Status: {'Not Available'}\n\nWHO Essential Medicine: {'Yes' if who else 'No'}"
416
- report_file = _save_pdf_report(regulatory_content, f"{drug_name}_regulatory_report.pdf")
417
- if report_file:
418
- with open(report_file, "rb") as file:
419
- st.download_button(
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  label="Download Regulatory Report (PDF)",
421
  data=file,
422
  file_name=f"{drug_name}_regulatory_report.pdf",
423
- mime="application/pdf")
 
 
424
 
425
  # --- Tab 5: Literature Search ---
426
  with tabs[4]:
@@ -428,24 +686,131 @@ with tabs[4]:
428
  search_term = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics")
429
  if st.button("Search PubMed"):
430
  with st.spinner("Searching PubMed..."):
431
- pubmed_data = _query_pubmed(search_term)
432
- if pubmed_data and 'esearchresult' in pubmed_data and 'idlist' in pubmed_data['esearchresult'] and pubmed_data['esearchresult']['idlist']:
433
  st.subheader("PubMed Search Results")
434
  st.write(f"Found {len(pubmed_data['esearchresult']['idlist'])} results for '{search_term}':")
435
  for article_id in pubmed_data['esearchresult']['idlist']:
436
- st.write(f"- PMID: {article_id}")
437
  else:
438
  st.write("No results found for that term.")
439
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  st.header("Ontology Search")
441
  ontology_search_term = st.text_input("Enter Search query for Ontology:", placeholder="Enter disease or ontology")
442
- ontology_select = st.selectbox("Select Ontology", ["MESH","NCIT","GO", "SNOMEDCT"])
443
  if st.button("Search BioPortal"):
444
  with st.spinner("Searching Ontology..."):
445
  bioportal_data = _get_bioportal_data(ontology_select, ontology_search_term)
446
- if bioportal_data and 'collection' in bioportal_data:
447
  st.subheader(f"BioPortal Search Results for {ontology_select}")
448
  for result in bioportal_data['collection']:
449
- st.write(f"- {result['prefLabel']} ({result['@id']})")
 
 
450
  else:
451
- st.write("No results found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import tempfile
7
  import time
8
  import requests
9
+ from bs4 import BeautifulSoup
10
  import json
11
  import pandas as pd
12
  import matplotlib.pyplot as plt
 
14
  from typing import Optional, Dict, List, Any
15
  import os
16
  import logging
17
+ from transformers import pipeline
18
 
19
  # Setup logging
20
+ logging.basicConfig(level=logging.ERROR) # Log only errors
21
 
22
  # API Endpoints (Centralized Configuration)
23
  API_ENDPOINTS = {
24
  "clinical_trials": "https://clinicaltrials.gov/api/v2/studies",
25
  "pubchem": "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/JSON",
26
+ "pubmed": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
27
  "who_drugs": "https://health-products.canada.ca/api/drug/product",
28
+ "fda_drug_approval": "https://api.fda.gov/drug/label.json",
29
+ "faers_adverse_events": "https://api.fda.gov/drug/event.json",
30
+ "pharmgkb_variant_clinical_annotations": "https://api.pharmgkb.org/v1/data/variant/{}/clinicalAnnotations",
31
+ "pharmgkb_gene_variants": "https://api.pharmgkb.org/v1/data/gene/{}/variants",
32
+ "bioportal_search": "https://data.bioontology.org/search",
33
+ "dailymed": "https://dailymed.nlm.nih.gov/dailymed/services/v2/spls.json",
34
+ "drugbank": "https://go.drugbank.com/unearth/q?utf8=%E2%9C%93&searcher=drugs&query={}",
35
  }
36
 
37
+ # Email addresses and API Keys
38
+ PUBMED_EMAIL = st.secrets.get("PUB_EMAIL")
 
 
 
 
 
39
  CLINICALTRIALS_EMAIL = PUBMED_EMAIL
40
 
41
+ BIOPORTAL_API_KEY = st.secrets.get("BIOPORTAL_API_KEY")
42
+ SEMANTIC_SCHOLAR_API_KEY = st.secrets.get("SEMANTIC_SCHOLAR_API_KEY")
43
+ OPENFDA_KEY = st.secrets.get("OPENFDA_KEY")
44
+
45
+ # Check for essential secrets
46
+ if not PUBMED_EMAIL:
47
+ st.error("PubMed email not found in secrets. Please add PUB_EMAIL to secrets.")
48
+ if not BIOPORTAL_API_KEY:
49
+ st.error("BioPortal API key not found in secrets. Please add BIOPORTAL_API_KEY to secrets.")
50
+ if not OPENFDA_KEY:
51
+ st.error("OpenFDA API key not found in secrets. Please add OPENFDA_KEY to secrets.")
52
+ if not SEMANTIC_SCHOLAR_API_KEY:
53
+ st.warning("Semantic Scholar API key not found. Some features may be unavailable.")
54
+
55
  # Initialize AI Agent (Context-aware)
56
  content_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=HfApiModel())
57
 
58
  # --- Utility Functions ---
59
+ def _query_api(endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict]:
60
  """Handles API requests with robust error handling."""
61
  try:
62
+ response = requests.get(endpoint, params=params, headers=headers, timeout=15)
63
  response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
64
  return response.json()
65
+ except requests.exceptions.HTTPError as http_err:
66
+ st.error(f"HTTP error occurred: {http_err} for endpoint {endpoint}.")
67
+ logging.error(f"HTTP error occurred: {http_err} for endpoint {endpoint}.")
68
+ except requests.exceptions.RequestException as req_err:
69
+ st.error(f"Request exception: {req_err} for endpoint {endpoint}.")
70
+ logging.error(f"Request exception: {req_err} for endpoint {endpoint}.")
71
+ except Exception as e:
72
+ st.error(f"An unexpected error occurred: {e} for endpoint {endpoint}.")
73
+ logging.error(f"Unexpected error: {e} for endpoint {endpoint}.")
74
+ return None
75
 
76
+ def _get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
77
+ """Fetches clinical annotations for a given variant from PharmGKB."""
78
+ endpoint = API_ENDPOINTS["pharmgkb_variant_clinical_annotations"].format(variant_id)
79
+ data = _query_api(endpoint)
80
+ if data and 'data' in data:
81
+ return data
 
 
 
 
 
 
 
 
 
 
82
  else:
83
+ st.write(f"No relevant pharmacogenomic data found for variant {variant_id}.")
84
  return None
85
 
86
+ def _get_pharmgkb_variants_for_gene(gene_symbol: str) -> Optional[List[str]]:
87
+ """Retrieves variant IDs associated with a given gene from PharmGKB."""
88
+ # First, get the PharmGKB gene ID from the gene symbol
89
+ gene_search_endpoint = "https://api.pharmgkb.org/v1/data/gene"
90
+ params = {
91
+ "name": gene_symbol
92
+ }
93
+ gene_data = _query_api(gene_search_endpoint, params)
94
+ if gene_data and 'data' in gene_data and len(gene_data['data']) > 0:
95
+ gene_id = gene_data['data'][0]['id']
96
+ # Now, retrieve variants for this gene
97
+ variants_endpoint = API_ENDPOINTS["pharmgkb_gene_variants"].format(gene_id)
98
+ variants_data = _query_api(variants_endpoint)
99
+ if variants_data and 'data' in variants_data:
100
+ variant_ids = [variant['id'] for variant in variants_data['data']]
101
+ return variant_ids
102
+ st.warning(f"No variants found for gene: {gene_symbol}")
103
+ return None
104
 
105
  def _get_pubchem_smiles(drug_name: str) -> Optional[str]:
106
  """Retrieves SMILES from PubChem, returns None on failure."""
107
  url = API_ENDPOINTS["pubchem"].format(drug_name)
108
  data = _query_api(url)
109
+ if data and 'PC_Compounds' in data and len(data['PC_Compounds']) > 0:
110
+ # Check if 'props' exists and find SMILES value
111
+ for prop in data['PC_Compounds'][0].get('props', []):
112
  if 'name' in prop and prop['name'] == 'Canonical SMILES':
113
  return prop['value']['sval']
114
  return None
115
 
116
+ def _draw_molecule(smiles: str) -> Optional[Any]:
 
117
  """Generates a 2D molecule image from SMILES."""
118
  try:
119
  mol = Chem.MolFromSmiles(smiles)
 
128
  logging.error(f"Error generating molecule image: {str(e)}")
129
  return None
130
 
131
+ def _get_clinical_trials(query: str, email: Optional[str] = CLINICALTRIALS_EMAIL) -> Optional[Dict]:
 
132
  """Queries clinicaltrials.gov with search term."""
133
  if not email:
134
  st.error("Clinical Trials email not configured.")
 
140
  }
141
  else:
142
  params = {
143
+ "term": query,
144
+ "retmax": 10,
145
+ "retmode": "json",
146
+ "email": email
147
  }
148
  return _query_api(API_ENDPOINTS["clinical_trials"], params)
149
 
150
+ def _get_fda_approval(drug_name: str, api_key: Optional[str] = OPENFDA_KEY) -> Optional[Dict]:
 
151
  """Retrieves FDA approval info."""
152
  if not api_key:
153
+ st.error("OpenFDA key not configured.")
154
+ return None
155
+ query = f'openfda.brand_name:"{drug_name}"'
156
+ params = {
157
+ "api_key": api_key,
158
+ "search": query,
159
+ "limit": 1
160
+ }
161
+ data = _query_api(API_ENDPOINTS["fda_drug_approval"], params)
162
+ if data and 'results' in data and len(data['results']) > 0:
163
  return data['results'][0]
164
  else:
165
+ return None
166
 
167
+ def _analyze_adverse_events(drug_name: str, api_key: Optional[str] = OPENFDA_KEY, limit: int = 5) -> Optional[Dict]:
168
  """Fetches and analyzes adverse event reports from FAERS."""
169
  if not api_key:
170
  st.error("OpenFDA key not configured.")
171
  return None
172
+ query = f'patient.drug.medicinalproduct:"{drug_name}"'
173
+ params = {
174
+ "api_key": api_key,
175
+ "search": query,
176
+ "limit": limit
177
+ }
178
+ data = _query_api(API_ENDPOINTS["faers_adverse_events"], params)
179
  if data and 'results' in data:
180
  return data
181
  else:
182
  return None
183
 
184
+ def scrape_ema_drug_info(drug_name: str) -> Optional[Dict]:
185
+ """
186
+ Scrapes EMA website for drug information based on the drug name.
187
+ """
188
+ try:
189
+ # Construct the search URL
190
+ search_url = f"https://www.ema.europa.eu/en/search?text={drug_name.replace(' ', '+')}&type=Product"
191
+
192
+ # Fetch the search results page
193
+ response = requests.get(search_url, timeout=10)
194
+ response.raise_for_status()
195
+
196
+ # Parse the HTML content
197
+ soup = BeautifulSoup(response.text, 'html.parser')
198
+
199
+ # Find the first search result link
200
+ result = soup.find('a', class_='search-result__title')
201
+ if not result:
202
+ st.warning(f"No EMA data found for '{drug_name}'.")
203
+ return None
204
+
205
+ # Extract the href attribute for the drug's EMA page
206
+ drug_page_url = "https://www.ema.europa.eu" + result.get('href')
207
+
208
+ # Fetch the drug's EMA page
209
+ drug_response = requests.get(drug_page_url, timeout=10)
210
+ drug_response.raise_for_status()
211
+
212
+ drug_soup = BeautifulSoup(drug_response.text, 'html.parser')
213
+
214
+ # Extract relevant information (e.g., approval status, indications)
215
+ approval_status = drug_soup.find('span', class_='product-status').get_text(strip=True) if drug_soup.find('span', class_='product-status') else "Not Available"
216
+ indications = drug_soup.find('div', class_='indications').get_text(strip=True) if drug_soup.find('div', class_='indications') else "Not Available"
217
+
218
+ return {
219
+ "Drug Name": drug_name,
220
+ "EMA Approval Status": approval_status,
221
+ "Indications": indications,
222
+ "EMA Drug Page": drug_page_url
223
+ }
224
+
225
+ except requests.exceptions.HTTPError as http_err:
226
+ st.error(f"HTTP error occurred while scraping EMA: {http_err}")
227
+ logging.error(f"HTTP error: {http_err}")
228
+ except Exception as e:
229
+ st.error(f"An error occurred while scraping EMA: {e}")
230
+ logging.error(f"Scraping error: {e}")
231
+
232
+ return None
233
 
234
+ def _get_dailymed_label(drug_name: str) -> Optional[Dict]:
235
+ """Retrieves drug label information from DailyMed."""
236
+ try:
237
+ base_url = API_ENDPOINTS["dailymed"]
238
+ params = {
239
+ "drug_name": drug_name,
240
+ "page": 1,
241
+ "pagesize": 1
242
+ }
243
+ data = _query_api(base_url, params)
244
+ if data and 'data' in data and len(data['data']) > 0:
245
+ label_url = data['data'][0]['url']
246
+ return {"Label URL": label_url}
247
+ else:
248
+ st.warning(f"No DailyMed label found for '{drug_name}'.")
249
+ return None
250
+ except Exception as e:
251
+ st.error(f"Error fetching DailyMed data: {e}")
252
+ logging.error(f"DailyMed error: {e}")
253
+ return None
254
 
255
  def _get_bioportal_data(ontology: str, term: str) -> Optional[Dict]:
256
  """Fetches data from BioPortal."""
257
  if not BIOPORTAL_API_KEY:
258
+ st.error("BioPortal API key not found. Please add the BIOPORTAL_API_KEY to secrets.")
259
+ return None
260
  if not term:
261
+ st.error("Please provide a search term.")
262
+ return None
263
 
264
  headers = {
265
  "Authorization": f"apikey token={BIOPORTAL_API_KEY}"
266
+ }
267
  params = {
268
  "q": term,
269
  "ontologies": ontology
270
  }
271
 
272
+ endpoint = API_ENDPOINTS["bioportal_search"]
273
  try:
274
+ data = _query_api(endpoint, params, headers)
 
 
275
  if data and 'collection' in data:
276
  return data
277
  else:
278
  st.warning("No results found for the BioPortal query.")
279
  return None
280
+ except Exception as e:
281
  st.error(f"BioPortal API request failed: {e} Please check connectivity and ensure you have the correct API Key.")
282
  logging.error(f"BioPortal API request failed: {e}")
283
  return None
284
+
285
+ def _get_semantic_scholar_papers(query: str, limit: int = 5) -> Optional[List[Dict]]:
286
+ """Fetches papers from Semantic Scholar based on the search query."""
287
+ try:
288
+ headers = {
289
+ "x-api-key": SEMANTIC_SCHOLAR_API_KEY
290
+ }
291
+ if not headers["x-api-key"]:
292
+ st.warning("Semantic Scholar API key not found. Please add it to secrets.")
293
+ return None
294
+ base_url = "https://api.semanticscholar.org/graph/v1/paper/search"
295
+ params = {
296
+ "query": query,
297
+ "limit": limit,
298
+ "fields": "title,authors,year,abstract,url"
299
+ }
300
+ data = _query_api(base_url, params, headers)
301
+ if data and 'data' in data:
302
+ return data['data']
303
+ else:
304
+ st.warning("No papers found for the given query.")
305
+ return None
306
+ except Exception as e:
307
+ st.error(f"Error fetching Semantic Scholar data: {e}")
308
+ logging.error(f"Semantic Scholar error: {e}")
309
+ return None
310
+
311
+ def _get_pharmvar_haplotypes(gene_symbol: str) -> Optional[List[Dict]]:
312
+ """Fetches haplotype information from PharmVar for a given gene."""
313
+ try:
314
+ base_url = f"https://api.pharmvar.org/v1/gene/{gene_symbol}/haplotypes"
315
+ data = _query_api(base_url)
316
+ if data and 'data' in data:
317
+ return data['data']
318
+ else:
319
+ st.warning(f"No haplotype data found for gene '{gene_symbol}'.")
320
+ return None
321
+ except Exception as e:
322
+ st.error(f"Error fetching PharmVar data: {e}")
323
+ logging.error(f"PharmVar error: {e}")
324
+ return None
325
+
326
+ def _create_variant_network(gene: str, variants: List[str], annotations: Dict):
327
+ """Creates an interactive network graph of gene-variant-drug relationships."""
328
+ import networkx as nx
329
+ import plotly.graph_objects as go
330
+
331
+ G = nx.Graph()
332
+ G.add_node(gene, color='lightblue')
333
+ for variant in variants:
334
+ G.add_node(variant, color='lightgreen')
335
+ G.add_edge(gene, variant)
336
+ # Add connected drugs from annotations
337
+ for drug in annotations.get(variant, []):
338
+ if drug != 'N/A':
339
+ G.add_node(drug, color='lightcoral')
340
+ G.add_edge(variant, drug)
341
+
342
+ pos = nx.spring_layout(G)
343
+ edge_x = []
344
+ edge_y = []
345
+ for edge in G.edges():
346
+ x0, y0 = pos[edge[0]]
347
+ x1, y1 = pos[edge[1]]
348
+ edge_x.extend([x0, x1, None])
349
+ edge_y.extend([y0, y1, None])
350
+
351
+ edge_trace = go.Scatter(
352
+ x=edge_x, y=edge_y,
353
+ line=dict(width=0.5, color='#888'),
354
+ hoverinfo='none',
355
+ mode='lines'
356
+ )
357
+
358
+ node_x = []
359
+ node_y = []
360
+ node_text = []
361
+ node_color = []
362
+ for node in G.nodes():
363
+ x, y = pos[node]
364
+ node_x.append(x)
365
+ node_y.append(y)
366
+ node_text.append(node)
367
+ node_color.append(G.nodes[node]['color'])
368
+
369
+ node_trace = go.Scatter(
370
+ x=node_x, y=node_y,
371
+ mode='markers+text',
372
+ hoverinfo='text',
373
+ text=node_text,
374
+ textposition="bottom center",
375
+ marker=dict(
376
+ showscale=False,
377
+ colorscale='YlGnBu',
378
+ color=node_color,
379
+ size=10,
380
+ line_width=2
381
+ )
382
+ )
383
 
384
+ fig = go.Figure(data=[edge_trace, node_trace],
385
+ layout=go.Layout(
386
+ title='<br>Gene-Variant-Drug Network',
387
+ titlefont_size=16,
388
+ showlegend=False,
389
+ hovermode='closest',
390
+ margin=dict(b=20,l=5,r=5,t=40),
391
+ annotations=[ dict(
392
+ text="Network visualization of gene-variant-drug relationships",
393
+ showarrow=False,
394
+ xref="paper", yref="paper",
395
+ x=0.005, y=-0.002 ) ],
396
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
397
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
398
+ )
399
+ return fig
400
+
401
+ def _summarize_abstract(abstract: str) -> str:
402
+ """Generates a summary of the given abstract using a pre-trained model."""
403
+ try:
404
+ summarizer = pipeline("summarization")
405
+ summary = summarizer(abstract, max_length=130, min_length=30, do_sample=False)
406
+ return summary[0]['summary_text']
407
+ except Exception as e:
408
+ st.error(f"Error summarizing abstract: {e}")
409
+ logging.error(f"NLP Summarization Error: {e}")
410
+ return "Summary not available."
411
+
412
  def _save_pdf_report(report_content: str, filename: str):
413
  """Saves content to a PDF file."""
414
+ try:
415
+ pdf = FPDF()
416
+ pdf.add_page()
417
+ pdf.set_font("Arial", size=12)
418
+ pdf.multi_cell(0, 10, report_content)
419
+ pdf.output(filename)
420
+ return filename
421
+ except Exception as e:
422
+ st.error(f"Error saving PDF report: {e}")
423
+ logging.error(f"Error saving PDF report: {e}")
424
+ return None
425
+
426
+ def _display_dataframe(data: List[Dict[str, Any]], columns: List[str]):
427
  """Displays data in a dataframe format."""
428
  if data:
429
  df = pd.DataFrame(data, columns=columns)
430
  st.dataframe(df)
431
  return df
432
  else:
433
+ st.warning("No data found for dataframe creation.")
434
+ return None
435
 
436
  # --- Streamlit App Configuration ---
437
  st.set_page_config(page_title="Pharma Research Expert Platform", layout="wide")
 
439
  st.markdown("An integrated platform for drug discovery, clinical research, and regulatory affairs.")
440
 
441
  # --- Tabs ---
442
+ tabs = st.tabs(["💊 Drug Development", "📊 Trial Analytics", "🧬 Molecular Profiling", "📜 Regulatory Intelligence", "📚 Literature Search", "📈 Dashboard"])
443
 
444
  # --- Tab 1: Drug Development ---
445
  with tabs[0]:
 
452
  with st.spinner("Analyzing target and competitive landscape..."):
453
  # AI-generated content with regulatory checks
454
  plan_prompt = f"""Develop a comprehensive drug development plan for the treatment of {target} using a {strategy} strategy.
455
+ Include sections on target validation, lead optimization, preclinical testing, clinical trial design, regulatory submission strategy, market analysis, and competitive landscape. Highlight key milestones and potential challenges."""
456
  plan = content_agent.run(plan_prompt)
457
 
458
  st.subheader("Comprehensive Development Plan")
 
468
  else:
469
  st.write("No relevant FDA data found.")
470
  else:
471
+ st.write("Please enter a target to get relevant FDA data.")
472
 
473
  # Pharmacogenomic integration
474
  st.subheader("Pharmacogenomic Considerations")
475
+ if target_gene:
476
+ variant_ids = _get_pharmgkb_variants_for_gene(target_gene)
477
+ if variant_ids:
478
+ annotations = {}
479
+ for variant_id in variant_ids[:5]: # Limit to first 5 variants for brevity
480
+ pgx_data = _get_pharmgkb_clinical_annotations(variant_id)
481
+ if pgx_data and 'data' in pgx_data:
482
+ annotations[variant_id] = [annotation.get('obj2Name', 'N/A') for annotation in pgx_data['data']]
483
+ else:
484
+ annotations[variant_id] = []
485
+
486
+ # Display annotations
487
+ for variant_id in variant_ids[:5]:
488
+ st.write(f"### Clinical Annotations for Variant: {variant_id}")
489
+ pgx_data = _get_pharmgkb_clinical_annotations(variant_id)
490
+ if pgx_data and 'data' in pgx_data:
491
+ for annotation in pgx_data['data']:
492
+ drug = annotation.get('obj2Name', 'N/A')
493
+ if drug != 'N/A':
494
+ st.write(f"- **Drug:** {drug}")
495
+ else:
496
+ st.write(f"No clinical annotations found for variant {variant_id}.")
497
+
498
+ # Fetch PharmVar haplotypes
499
+ haplotypes = _get_pharmvar_haplotypes(target_gene)
500
+ if haplotypes:
501
+ st.subheader("PharmVar Haplotypes")
502
+ for hap in haplotypes:
503
+ st.write(f"- **Haplotype:** {hap.get('name', 'N/A')}")
504
+ st.write(f" - **Variant IDs:** {', '.join(hap.get('variantIds', []))}")
505
+
506
+ # Display variant network
507
+ fig = _create_variant_network(target_gene, variant_ids[:5], annotations)
508
+ st.plotly_chart(fig, use_container_width=True)
509
+ else:
510
+ st.write("No variants found for the specified gene.")
511
  else:
512
+ st.write("Please enter a target gene to retrieve pharmacogenomic data.")
 
513
 
514
  # --- Tab 2: Clinical Trial Analytics ---
515
  with tabs[1]:
 
519
  if st.button("Analyze Trial Landscape"):
520
  with st.spinner("Aggregating global trial data..."):
521
  trials = _get_clinical_trials(trial_query)
522
+ if trials and 'studies' in trials and len(trials['studies']) > 0:
523
  st.subheader("Recent Clinical Trials")
524
  trial_data = []
525
  for study in trials['studies'][:5]:
 
526
  trial_data.append({
527
+ "Title": study.get('briefTitle', 'N/A'),
528
+ "Status": study.get('overallStatus', 'N/A'),
529
+ "Phase": study.get('phase', 'Not Available'),
530
+ "Enrollment": study.get('enrollmentCount', 'Not Available')
531
+ })
532
 
533
  trial_df = _display_dataframe(trial_data, list(trial_data[0].keys())) if trial_data else None
534
 
 
536
  st.markdown("### Clinical Trial Summary (First 5 trials)")
537
  st.dataframe(trial_df)
538
 
 
539
  # Adverse events analysis
540
  ae_data = _analyze_adverse_events(trial_query)
541
+ if ae_data and 'results' in ae_data and len(ae_data['results']) > 0:
542
  st.subheader("Adverse Event Profile (Top 5 Reports)")
543
 
544
  ae_results = ae_data['results'][:5]
545
+ ae_df = pd.json_normalize(ae_results)
546
  st.dataframe(ae_df)
547
 
548
+ # Visualization of adverse events
549
+ if 'patient.reaction.reactionmeddrapt' in ae_df.columns and not ae_df.empty:
550
  try:
551
+ reactions = ae_df['patient.reaction.reactionmeddrapt'].explode().dropna()
552
+ top_reactions = reactions.value_counts().nlargest(10)
553
+
554
+ fig, ax = plt.subplots(figsize=(10,6))
555
+ sns.barplot(x=top_reactions.values, y=top_reactions.index, ax=ax)
556
+ plt.title('Top Adverse Reactions')
557
+ plt.xlabel('Frequency')
558
+ plt.ylabel('Adverse Reaction')
559
+ st.pyplot(fig)
560
+
561
+ # Display as dataframe
562
+ st.markdown("### Top 10 Adverse Reaction Summary")
563
+ st.dataframe(pd.DataFrame({'Reaction': top_reactions.index, 'Frequency': top_reactions.values}))
 
 
 
 
 
 
 
 
 
 
 
 
 
564
  except Exception as e:
565
  st.error(f"Error processing adverse events data: {e}")
566
+ else:
567
+ st.write("No adverse event data found.")
568
  else:
569
  st.warning("No clinical trials found for the given search term.")
570
 
 
571
  # --- Tab 3: Molecular Profiling ---
572
  with tabs[2]:
573
  st.header("Advanced Molecular Profiling")
 
576
 
577
  if st.button("Analyze Compound"):
578
  with st.spinner("Querying global databases..."):
579
+ # Determine if input is a SMILES string
580
+ smiles = None
581
+ if Chem.MolFromSmiles(compound_input):
582
+ smiles = compound_input
583
+ else:
584
+ smiles = _get_pubchem_smiles(compound_input)
585
 
586
  if smiles:
587
  img = _draw_molecule(smiles)
 
589
  st.image(img, caption="2D Structure")
590
  else:
591
  st.error("Compound structure not found in databases.")
592
+
593
  # PubChem properties
594
+ if compound_input:
595
+ pubchem_data = _query_api(API_ENDPOINTS["pubchem"].format(compound_input))
596
+ if pubchem_data and 'PC_Compounds' in pubchem_data and len(pubchem_data['PC_Compounds']) > 0:
597
+ st.subheader("Physicochemical Properties")
598
+ props = pubchem_data['PC_Compounds'][0].get('props', [])
599
+ mw = next((prop['value']['sval'] for prop in props if 'name' in prop and prop['name'] == 'Molecular Weight'), 'N/A')
600
+ logp = next((prop['value']['sval'] for prop in props if 'name' in prop and prop['name'] == 'LogP'), 'N/A')
601
+
602
+ st.write(f"""
603
+ **Molecular Weight:** {mw}
604
+ **LogP:** {logp}
605
+ """)
606
+ else:
607
+ st.error("Physicochemical properties not found.")
608
  else:
609
+ st.warning("Please enter a compound identifier.")
 
610
 
611
  # --- Tab 4: Regulatory Intelligence ---
612
  with tabs[3]:
 
615
 
616
  if st.button("Generate Regulatory Report"):
617
  with st.spinner("Compiling global regulatory status..."):
618
+ # FDA Status
619
+ fda_info = _get_fda_approval(drug_name)
620
+ fda_status = "Not Approved"
621
+ if fda_info and 'openfda' in fda_info and 'brand_name' in fda_info['openfda']:
622
+ fda_status = ', '.join(fda_info['openfda']['brand_name'])
623
+
624
+ # EMA Status via Scraping
625
+ ema_info = scrape_ema_drug_info(drug_name)
626
+ ema_status = ema_info['EMA Approval Status'] if ema_info else "Not Available"
627
+
628
+ # WHO Essential Medicine
629
+ who = _query_api(API_ENDPOINTS["who_drugs"], params={"name": drug_name})
630
+ who_status = "Yes" if who else "No"
631
+
632
+ # DailyMed Label
633
+ dailymed_info = _get_dailymed_label(drug_name)
634
+ dailymed_status = dailymed_info['Label URL'] if dailymed_info else "Not Available"
635
 
636
  st.subheader("Regulatory Status")
637
+ col1, col2, col3, col4 = st.columns(4)
638
  with col1:
639
  st.markdown("**FDA Status**")
640
+ st.write(fda_status)
641
  with col2:
642
  st.markdown("**EMA Status**")
643
+ st.write(ema_status)
 
644
  with col3:
645
  st.markdown("**WHO Essential Medicine**")
646
+ st.write(who_status)
647
+ with col4:
648
+ st.markdown("**DailyMed Label**")
649
+ st.write(dailymed_status)
650
+ if dailymed_info and 'Label URL' in dailymed_info:
651
+ st.markdown(f"[View DailyMed Label]({dailymed_info['Label URL']})")
652
+
653
+ # Display EMA Drug Page Link if available
654
+ if ema_info and 'EMA Drug Page' in ema_info:
655
+ st.markdown(f"[View EMA Drug Page]({ema_info['EMA Drug Page']})")
656
+
657
  # Save the information to a PDF report
658
+ regulatory_content = f"""### Regulatory Report
659
+
660
+ **FDA Status:** {fda_status}
661
+
662
+ **EMA Status:** {ema_status}
663
+
664
+ **WHO Essential Medicine:** {who_status}
665
+
666
+ **DailyMed Label:** {dailymed_status}
667
+ """
668
+ if ema_info and 'EMA Drug Page' in ema_info:
669
+ regulatory_content += f"\n**EMA Drug Page:** {ema_info['EMA Drug Page']}\n"
670
+
671
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
672
+ report_file = _save_pdf_report(regulatory_content, tmp_file.name)
673
+ if report_file:
674
+ with open(report_file, "rb") as file:
675
+ st.download_button(
676
  label="Download Regulatory Report (PDF)",
677
  data=file,
678
  file_name=f"{drug_name}_regulatory_report.pdf",
679
+ mime="application/pdf"
680
+ )
681
+ os.remove(report_file) # Clean up the temporary file
682
 
683
  # --- Tab 5: Literature Search ---
684
  with tabs[4]:
 
686
  search_term = st.text_input("Enter search query for PubMed:", placeholder="e.g., Alzheimer's disease genetics")
687
  if st.button("Search PubMed"):
688
  with st.spinner("Searching PubMed..."):
689
+ pubmed_data = _get_pubmed(search_term)
690
+ if pubmed_data and 'esearchresult' in pubmed_data and 'idlist' in pubmed_data['esearchresult'] and len(pubmed_data['esearchresult']['idlist']) > 0:
691
  st.subheader("PubMed Search Results")
692
  st.write(f"Found {len(pubmed_data['esearchresult']['idlist'])} results for '{search_term}':")
693
  for article_id in pubmed_data['esearchresult']['idlist']:
694
+ st.markdown(f"- [PMID: {article_id}](https://pubmed.ncbi.nlm.nih.gov/{article_id}/)")
695
  else:
696
  st.write("No results found for that term.")
697
+
698
+ # Enhanced Literature with Semantic Scholar
699
+ st.header("Semantic Scholar Literature Search")
700
+ semantic_query = st.text_input("Enter search query for Semantic Scholar:", placeholder="e.g., Alzheimer's disease genetics")
701
+ if st.button("Search Semantic Scholar"):
702
+ with st.spinner("Searching Semantic Scholar..."):
703
+ semantic_papers = _get_semantic_scholar_papers(semantic_query)
704
+ if semantic_papers:
705
+ st.subheader("Semantic Scholar Search Results")
706
+ for paper in semantic_papers:
707
+ title = paper.get('title', 'N/A')
708
+ authors = ', '.join([author['name'] for author in paper.get('authors', [])])
709
+ year = paper.get('year', 'N/A')
710
+ url = paper.get('url', '#')
711
+ abstract = paper.get('abstract', 'No abstract available.')
712
+ summary = _summarize_abstract(abstract) if abstract != 'No abstract available.' else "No summary available."
713
+
714
+ st.markdown(f"### [{title}]({url})")
715
+ st.write(f"**Authors:** {authors}")
716
+ st.write(f"**Year:** {year}")
717
+ st.write(f"**Abstract Summary:** {summary}")
718
+ st.write("---")
719
+ else:
720
+ st.write("No results found for that term.")
721
+
722
+ # Ontology Search
723
  st.header("Ontology Search")
724
  ontology_search_term = st.text_input("Enter Search query for Ontology:", placeholder="Enter disease or ontology")
725
+ ontology_select = st.selectbox("Select Ontology", ["MESH", "NCIT", "GO", "SNOMEDCT"])
726
  if st.button("Search BioPortal"):
727
  with st.spinner("Searching Ontology..."):
728
  bioportal_data = _get_bioportal_data(ontology_select, ontology_search_term)
729
+ if bioportal_data and 'collection' in bioportal_data and len(bioportal_data['collection']) > 0:
730
  st.subheader(f"BioPortal Search Results for {ontology_select}")
731
  for result in bioportal_data['collection']:
732
+ label = result.get('prefLabel', 'N/A')
733
+ ontology_id = result.get('@id', 'N/A')
734
+ st.markdown(f"- **{label}** ({ontology_id})")
735
  else:
736
+ st.write("No results found.")
737
+
738
+ # --- Tab 6: Dashboard ---
739
+ with tabs[5]:
740
+ st.header("Comprehensive Dashboard")
741
+
742
+ # Sample KPI Counts (Replace with actual data)
743
+ fda_count = 5000 # Placeholder
744
+ ema_count = 3000
745
+ who_count = 1500
746
+ clinical_trials_count = 12000
747
+ publications_count = 250000
748
+
749
+ def _create_kpi_dashboard(fda_count: int, ema_count: int, who_count: int, trials: int, publications: int):
750
+ """Creates KPI cards for the dashboard."""
751
+ col1, col2, col3, col4, col5 = st.columns(5)
752
+ col1.metric("FDA Approved Drugs", fda_count)
753
+ col2.metric("EMA Approved Drugs", ema_count)
754
+ col3.metric("WHO Essential Medicines", who_count)
755
+ col4.metric("Ongoing Clinical Trials", trials)
756
+ col5.metric("Publications Found", publications)
757
+
758
+ _create_kpi_dashboard(fda_count, ema_count, who_count, clinical_trials_count, publications_count)
759
+
760
+ # Trend Graphs
761
+ st.subheader("Trends Over Time")
762
+ # Example: Number of FDA approvals per year
763
+ years = list(range(2000, 2026))
764
+ fda_approvals = [fda_count // 26] * len(years) # Placeholder data
765
+
766
+ fig, ax = plt.subplots(figsize=(10,6))
767
+ sns.lineplot(x=years, y=fda_approvals, marker='o', ax=ax)
768
+ ax.set_title('FDA Drug Approvals Over Years')
769
+ ax.set_xlabel('Year')
770
+ ax.set_ylabel('Number of Approvals')
771
+ st.pyplot(fig)
772
+
773
+ # Geographical Maps
774
+ st.subheader("Geographical Distribution of Clinical Trials")
775
+ # Placeholder: Actual implementation would require location data from ClinicalTrials.gov
776
+ # Example: Display a static map
777
+ st.write("Feature under development. Geographical data from clinical trials will be visualized here.")
778
+
779
+ # Interactive Network Graph
780
+ st.subheader("Gene-Variant-Drug Network")
781
+ # Placeholder: Fetch sample gene, variants, and drugs
782
+ sample_gene = "CYP2C19"
783
+ sample_variants = ["rs4244285", "rs12248560"]
784
+ sample_annotations = {
785
+ "rs4244285": ["Clopidogrel", "Omeprazole"],
786
+ "rs12248560": ["Sertraline"]
787
+ }
788
+
789
+ fig = _create_variant_network(sample_gene, sample_variants, sample_annotations)
790
+ st.plotly_chart(fig, use_container_width=True)
791
+
792
+ # --- Additional Enhancements: User Authentication (Optional) ---
793
+ # Implement user authentication if needed using streamlit-authenticator or Auth0
794
+
795
+ # --- Additional Enhancements: Caching ---
796
+ @st.cache_data(ttl=3600)
797
+ def cached_get_pharmgkb_clinical_annotations(variant_id: str) -> Optional[Dict]:
798
+ """Cached version of fetching clinical annotations from PharmGKB."""
799
+ return _get_pharmgkb_clinical_annotations(variant_id)
800
+
801
+ @st.cache_data(ttl=3600)
802
+ def cached_get_pubchem_smiles(drug_name: str) -> Optional[str]:
803
+ """Cached version of retrieving SMILES from PubChem."""
804
+ return _get_pubchem_smiles(drug_name)
805
+
806
+ # --- Final Touches ---
807
+ st.sidebar.header("About")
808
+ st.sidebar.info("""
809
+ **Pharma Research Expert Platform**
810
+
811
+ An integrated platform for drug discovery, clinical research, and regulatory affairs.
812
+
813
+ **Developed by:** Your Name
814
+
815
+ **Contact:** [[email protected]](mailto:[email protected])
816
+ """)