mgbam commited on
Commit
d35b5b6
·
verified ·
1 Parent(s): b5bcdfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -168
app.py CHANGED
@@ -1,23 +1,24 @@
1
- import streamlit as st
2
  import os
 
 
 
 
3
  import tempfile
4
  import requests
5
- import json
6
  from openai import OpenAI
7
  from rdkit import Chem
8
- from rdkit.Chem import Draw
9
  import faiss
10
- import numpy as np
11
  from PyPDF2 import PdfReader
12
- from typing import List, Dict, Optional, Tuple
13
- from dotenv import load_dotenv
14
- from fpdf import FPDF
15
 
16
- # --------------------------
17
- # Configuration & Environment
18
- # --------------------------
19
  load_dotenv()
20
 
 
 
 
21
  class AppConfig:
22
  OPENAI_MODEL = "gpt-4-turbo-preview"
23
  EMBEDDING_MODEL = "text-embedding-3-large"
@@ -26,13 +27,11 @@ class AppConfig:
26
  RAG_THRESHOLD = 0.78
27
  MAX_CONTEXT_CHUNKS = 5
28
 
29
- # --------------------------
30
- # OpenAI Client Initialization
31
- # --------------------------
32
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
33
 
34
  # --------------------------
35
- # RAG Vector Index Implementation
36
  # --------------------------
37
  class VectorIndex:
38
  def __init__(self):
@@ -41,109 +40,64 @@ class VectorIndex:
41
  self.metadata = []
42
 
43
  def add_document(self, text: str, source: str):
44
- """Add document to vector index with metadata"""
45
  embedding = self._get_embedding(text)
46
  self.index.add(np.array([embedding], dtype=np.float32))
47
  self.documents.append(text)
48
- self.metadata.append({
49
- "source": source,
50
- "timestamp": time.time()
51
- })
52
 
53
  def search(self, query: str) -> List[Tuple[str, Dict]]:
54
- """Search index with relevance scoring"""
55
  query_embed = self._get_embedding(query)
56
  distances, indices = self.index.search(np.array([query_embed], dtype=np.float32), AppConfig.MAX_CONTEXT_CHUNKS)
57
 
58
- results = []
59
- for i, idx in enumerate(indices[0]):
60
- if idx >= 0 and distances[0][i] < AppConfig.RAG_THRESHOLD:
61
- results.append((
62
- self.documents[idx],
63
- {**self.metadata[idx], "score": float(distances[0][i])}
64
- ))
65
- return results
66
 
67
  def _get_embedding(self, text: str) -> List[float]:
68
- """Generate embeddings using OpenAI"""
69
- response = client.embeddings.create(
70
- input=text,
71
- model=AppConfig.EMBEDDING_MODEL
72
- )
73
  return response.data[0].embedding
74
 
75
- # Initialize vector index
76
  knowledge_base = VectorIndex()
77
 
78
  # --------------------------
79
- # Pharmaceutical Tools with OpenAI Function Calling
80
  # --------------------------
81
  class PharmaTools:
82
  @staticmethod
83
  def clinical_trial_search(query: str) -> Dict:
84
- """Search clinical trials with safety checks"""
85
  try:
86
- response = client.chat.completions.create(
87
- model=AppConfig.OPENAI_MODEL,
88
- messages=[{
89
- "role": "user",
90
- "content": f"Generate clinicaltrials.gov API parameters for: {query}"
91
- }],
92
- tools=[{
93
- "type": "function",
94
- "function": {
95
- "name": "clinical_trial_search",
96
- "description": "Search clinical trials database",
97
- "parameters": {
98
- "type": "object",
99
- "properties": {
100
- "query": {"type": "string"},
101
- "max_results": {"type": "integer"}
102
- }
103
- }
104
- }
105
- }]
106
- )
107
-
108
- # Execute actual API call here
109
- return {"results": "Sample clinical trial data"}
110
-
111
  except Exception as e:
112
  return {"error": str(e)}
113
 
114
  @staticmethod
115
  def molecular_analysis(smiles: str) -> Dict:
116
- """Analyze molecular structure with RDKit"""
117
  try:
118
  mol = Chem.MolFromSmiles(smiles)
119
  if not mol:
120
  return {"error": "Invalid SMILES"}
121
 
122
  properties = {
123
- "molecular_weight": Chem.Descriptors.ExactMolWt(mol),
124
- "logp": Chem.Crippen.MolLogP(mol),
125
- "h_bond_donors": Chem.Lipinski.NumHDonors(mol)
126
  }
127
 
128
- # Add AI-powered predictions
129
  ai_analysis = client.chat.completions.create(
130
  model=AppConfig.OPENAI_MODEL,
131
- messages=[{
132
- "role": "user",
133
- "content": f"Predict pharmaceutical properties for SMILES {smiles}:\n{properties}"
134
- }]
135
  )
136
 
137
  return {
138
  "calculated": properties,
139
  "ai_predictions": json.loads(ai_analysis.choices[0].message.content)
140
  }
141
-
142
  except Exception as e:
143
  return {"error": str(e)}
144
 
145
  # --------------------------
146
- # AI Agent with RAG Integration
147
  # --------------------------
148
  class PharmaAgent:
149
  def __init__(self):
@@ -151,112 +105,50 @@ class PharmaAgent:
151
  self.system_prompt = self._build_system_prompt()
152
 
153
  def query(self, prompt: str) -> str:
154
- """Execute query with RAG context"""
155
- # Retrieve relevant knowledge
156
  rag_context = knowledge_base.search(prompt)
157
-
158
- # Build context-aware prompt
159
- messages = [{
160
- "role": "system",
161
- "content": self.system_prompt
162
- }]
163
 
164
  if rag_context:
165
  messages.append({
166
  "role": "assistant",
167
- "content": "Relevant knowledge:\n" + "\n".join([f"[Source: {meta['source']}]\n{text}"
168
- for text, meta in rag_context])
169
  })
170
-
171
- messages.append({
172
- "role": "user",
173
- "content": prompt
174
- })
175
 
176
- # Execute OpenAI query
 
177
  try:
178
  response = client.chat.completions.create(
179
  model=AppConfig.OPENAI_MODEL,
180
  messages=messages,
181
- tools=self._tools_schema(),
182
  timeout=AppConfig.API_TIMEOUT
183
  )
184
-
185
- return self._process_response(response)
186
-
187
  except Exception as e:
188
  return f"Error: {str(e)}"
189
 
190
  def _build_system_prompt(self) -> str:
191
- """Construct dynamic system prompt"""
192
- return f"""You are a pharmaceutical research AI with access to:
193
- - Molecular analysis tools
194
- - Clinical trial databases
195
- - Latest research via RAG (Updated: {time.ctime()})
196
-
197
- Follow these rules:
198
- 1. Always validate chemical structures
199
- 2. Cite sources from RAG context
200
- 3. Check for recent regulatory updates
201
- 4. Maintain safety protocols"""
202
-
203
- def _tools_schema(self) -> List[Dict]:
204
- """Generate OpenAI-compatible tool schema"""
205
- return [{
206
- "type": "function",
207
- "function": {
208
- "name": tool.__name__,
209
- "description": tool.__doc__,
210
- "parameters": {
211
- "type": "object",
212
- "properties": {
213
- # Add parameter definitions here
214
- }
215
- }
216
- }
217
- } for tool in self.tools]
218
-
219
- def _process_response(self, response) -> str:
220
- """Handle tool calls and response parsing"""
221
- # Add tool execution logic here
222
- return response.choices[0].message.content
223
 
224
  # --------------------------
225
- # Streamlit UI Components
226
  # --------------------------
227
  def main():
228
- st.set_page_config(
229
- page_title="PharmaAI Research Suite",
230
- page_icon="🧬",
231
- layout="wide"
232
- )
233
-
234
  st.title("PharmaAI Research Suite")
235
  st.markdown("Integrated AI Platform for Pharmaceutical Research")
236
 
237
- # Initialize components
238
  agent = PharmaAgent()
239
 
240
- # Knowledge Base Management
241
  with st.sidebar:
242
  st.header("🧠 Knowledge Base")
243
- uploaded_files = st.file_uploader("Upload Research Documents",
244
- type=["pdf", "txt"],
245
- accept_multiple_files=True)
246
 
247
  if uploaded_files:
248
  for file in uploaded_files:
249
- text = ""
250
- if file.type == "application/pdf":
251
- reader = PdfReader(file)
252
- text = "\n".join([page.extract_text() for page in reader.pages])
253
- else:
254
- text = file.getvalue().decode()
255
-
256
  knowledge_base.add_document(text, file.name)
257
  st.success(f"Added {len(uploaded_files)} documents to knowledge base")
258
 
259
- # Main Interface
260
  tab1, tab2, tab3 = st.tabs(["Drug Development", "Molecular Analysis", "Literature Review"])
261
 
262
  with tab1:
@@ -265,13 +157,7 @@ def main():
265
  strategy = st.selectbox("Development Strategy", ["First-in-class", "Me-too", "Biologic"])
266
 
267
  if st.button("Generate Development Plan"):
268
- prompt = f"""Develop {strategy} drug targeting {target} considering:
269
- - Target validation
270
- - Safety profile
271
- - Competitive landscape
272
- - Regulatory pathway"""
273
-
274
- response = agent.query(prompt)
275
  st.markdown(response)
276
 
277
  with tab2:
@@ -284,18 +170,11 @@ def main():
284
  st.error(analysis["error"])
285
  else:
286
  col1, col2 = st.columns(2)
287
- with col1:
288
- st.subheader("Calculated Properties")
289
- st.json(analysis["calculated"])
290
- with col2:
291
- st.subheader("AI Predictions")
292
- st.json(analysis["ai_predictions"])
293
-
294
- # Visualization
295
  mol = Chem.MolFromSmiles(smiles)
296
  if mol:
297
- img = Draw.MolToImage(mol, size=(400, 400))
298
- st.image(img, caption="Molecular Structure")
299
 
300
  with tab3:
301
  st.header("Literature Review")
@@ -303,12 +182,9 @@ def main():
303
 
304
  if research_query:
305
  results = knowledge_base.search(research_query)
306
- if not results:
307
- st.info("No relevant documents found")
308
- else:
309
- for text, meta in results:
310
- with st.expander(f"Source: {meta['source']} (Score: {meta['score']:.2f})"):
311
- st.markdown(f"```\n{text[:1000]}...\n```")
312
 
313
  if __name__ == "__main__":
314
- main()
 
 
1
  import os
2
+ import time
3
+ import json
4
+ import numpy as np
5
+ import streamlit as st
6
  import tempfile
7
  import requests
8
+ from dotenv import load_dotenv
9
  from openai import OpenAI
10
  from rdkit import Chem
11
+ from rdkit.Chem import Draw, Descriptors, Crippen, Lipinski
12
  import faiss
 
13
  from PyPDF2 import PdfReader
14
+ from typing import List, Dict, Tuple
 
 
15
 
16
+ # Load environment variables
 
 
17
  load_dotenv()
18
 
19
+ # --------------------------
20
+ # Configuration Settings
21
+ # --------------------------
22
  class AppConfig:
23
  OPENAI_MODEL = "gpt-4-turbo-preview"
24
  EMBEDDING_MODEL = "text-embedding-3-large"
 
27
  RAG_THRESHOLD = 0.78
28
  MAX_CONTEXT_CHUNKS = 5
29
 
30
+ # Initialize OpenAI Client
 
 
31
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
32
 
33
  # --------------------------
34
+ # Vector Index for RAG
35
  # --------------------------
36
  class VectorIndex:
37
  def __init__(self):
 
40
  self.metadata = []
41
 
42
  def add_document(self, text: str, source: str):
 
43
  embedding = self._get_embedding(text)
44
  self.index.add(np.array([embedding], dtype=np.float32))
45
  self.documents.append(text)
46
+ self.metadata.append({"source": source, "timestamp": time.time()})
 
 
 
47
 
48
  def search(self, query: str) -> List[Tuple[str, Dict]]:
 
49
  query_embed = self._get_embedding(query)
50
  distances, indices = self.index.search(np.array([query_embed], dtype=np.float32), AppConfig.MAX_CONTEXT_CHUNKS)
51
 
52
+ return [
53
+ (self.documents[idx], {**self.metadata[idx], "score": float(distances[0][i])})
54
+ for i, idx in enumerate(indices[0]) if idx >= 0 and distances[0][i] < AppConfig.RAG_THRESHOLD
55
+ ]
 
 
 
 
56
 
57
  def _get_embedding(self, text: str) -> List[float]:
58
+ response = client.embeddings.create(input=text, model=AppConfig.EMBEDDING_MODEL)
 
 
 
 
59
  return response.data[0].embedding
60
 
 
61
  knowledge_base = VectorIndex()
62
 
63
  # --------------------------
64
+ # Pharmaceutical Tools
65
  # --------------------------
66
  class PharmaTools:
67
  @staticmethod
68
  def clinical_trial_search(query: str) -> Dict:
 
69
  try:
70
+ return {"results": "Sample clinical trial data"} # Placeholder for API integration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  except Exception as e:
72
  return {"error": str(e)}
73
 
74
  @staticmethod
75
  def molecular_analysis(smiles: str) -> Dict:
 
76
  try:
77
  mol = Chem.MolFromSmiles(smiles)
78
  if not mol:
79
  return {"error": "Invalid SMILES"}
80
 
81
  properties = {
82
+ "molecular_weight": Descriptors.ExactMolWt(mol),
83
+ "logp": Crippen.MolLogP(mol),
84
+ "h_bond_donors": Lipinski.NumHDonors(mol)
85
  }
86
 
 
87
  ai_analysis = client.chat.completions.create(
88
  model=AppConfig.OPENAI_MODEL,
89
+ messages=[{"role": "user", "content": f"Predict properties for SMILES {smiles}:\n{properties}"}]
 
 
 
90
  )
91
 
92
  return {
93
  "calculated": properties,
94
  "ai_predictions": json.loads(ai_analysis.choices[0].message.content)
95
  }
 
96
  except Exception as e:
97
  return {"error": str(e)}
98
 
99
  # --------------------------
100
+ # Pharma AI Agent
101
  # --------------------------
102
  class PharmaAgent:
103
  def __init__(self):
 
105
  self.system_prompt = self._build_system_prompt()
106
 
107
  def query(self, prompt: str) -> str:
 
 
108
  rag_context = knowledge_base.search(prompt)
109
+ messages = [{"role": "system", "content": self.system_prompt}]
 
 
 
 
 
110
 
111
  if rag_context:
112
  messages.append({
113
  "role": "assistant",
114
+ "content": "Relevant knowledge:\n" + "\n".join([f"[Source: {meta['source']}]\n{text}" for text, meta in rag_context])
 
115
  })
 
 
 
 
 
116
 
117
+ messages.append({"role": "user", "content": prompt})
118
+
119
  try:
120
  response = client.chat.completions.create(
121
  model=AppConfig.OPENAI_MODEL,
122
  messages=messages,
 
123
  timeout=AppConfig.API_TIMEOUT
124
  )
125
+ return response.choices[0].message.content
 
 
126
  except Exception as e:
127
  return f"Error: {str(e)}"
128
 
129
  def _build_system_prompt(self) -> str:
130
+ return f"""You are a pharmaceutical AI with access to molecular analysis, clinical trial data, and research via RAG (Updated: {time.ctime()}). Follow safety protocols."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  # --------------------------
133
+ # Streamlit UI
134
  # --------------------------
135
  def main():
136
+ st.set_page_config(page_title="PharmaAI Research Suite", page_icon="🧬", layout="wide")
 
 
 
 
 
137
  st.title("PharmaAI Research Suite")
138
  st.markdown("Integrated AI Platform for Pharmaceutical Research")
139
 
 
140
  agent = PharmaAgent()
141
 
 
142
  with st.sidebar:
143
  st.header("🧠 Knowledge Base")
144
+ uploaded_files = st.file_uploader("Upload Research Documents", type=["pdf", "txt"], accept_multiple_files=True)
 
 
145
 
146
  if uploaded_files:
147
  for file in uploaded_files:
148
+ text = "\n".join([page.extract_text() for page in PdfReader(file).pages]) if file.type == "application/pdf" else file.getvalue().decode()
 
 
 
 
 
 
149
  knowledge_base.add_document(text, file.name)
150
  st.success(f"Added {len(uploaded_files)} documents to knowledge base")
151
 
 
152
  tab1, tab2, tab3 = st.tabs(["Drug Development", "Molecular Analysis", "Literature Review"])
153
 
154
  with tab1:
 
157
  strategy = st.selectbox("Development Strategy", ["First-in-class", "Me-too", "Biologic"])
158
 
159
  if st.button("Generate Development Plan"):
160
+ response = agent.query(f"Develop {strategy} drug targeting {target}.")
 
 
 
 
 
 
161
  st.markdown(response)
162
 
163
  with tab2:
 
170
  st.error(analysis["error"])
171
  else:
172
  col1, col2 = st.columns(2)
173
+ col1.json(analysis["calculated"])
174
+ col2.json(analysis["ai_predictions"])
 
 
 
 
 
 
175
  mol = Chem.MolFromSmiles(smiles)
176
  if mol:
177
+ st.image(Draw.MolToImage(mol, size=(400, 400)), caption="Molecular Structure")
 
178
 
179
  with tab3:
180
  st.header("Literature Review")
 
182
 
183
  if research_query:
184
  results = knowledge_base.search(research_query)
185
+ for text, meta in results:
186
+ with st.expander(f"Source: {meta['source']} (Score: {meta['score']:.2f})"):
187
+ st.markdown(f"```\n{text[:1000]}...\n```")
 
 
 
188
 
189
  if __name__ == "__main__":
190
+ main()