Update app.py
Browse files
app.py
CHANGED
@@ -1,23 +1,24 @@
|
|
1 |
-
import streamlit as st
|
2 |
import os
|
|
|
|
|
|
|
|
|
3 |
import tempfile
|
4 |
import requests
|
5 |
-
import
|
6 |
from openai import OpenAI
|
7 |
from rdkit import Chem
|
8 |
-
from rdkit.Chem import Draw
|
9 |
import faiss
|
10 |
-
import numpy as np
|
11 |
from PyPDF2 import PdfReader
|
12 |
-
from typing import List, Dict,
|
13 |
-
from dotenv import load_dotenv
|
14 |
-
from fpdf import FPDF
|
15 |
|
16 |
-
#
|
17 |
-
# Configuration & Environment
|
18 |
-
# --------------------------
|
19 |
load_dotenv()
|
20 |
|
|
|
|
|
|
|
21 |
class AppConfig:
|
22 |
OPENAI_MODEL = "gpt-4-turbo-preview"
|
23 |
EMBEDDING_MODEL = "text-embedding-3-large"
|
@@ -26,13 +27,11 @@ class AppConfig:
|
|
26 |
RAG_THRESHOLD = 0.78
|
27 |
MAX_CONTEXT_CHUNKS = 5
|
28 |
|
29 |
-
#
|
30 |
-
# OpenAI Client Initialization
|
31 |
-
# --------------------------
|
32 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
33 |
|
34 |
# --------------------------
|
35 |
-
#
|
36 |
# --------------------------
|
37 |
class VectorIndex:
|
38 |
def __init__(self):
|
@@ -41,109 +40,64 @@ class VectorIndex:
|
|
41 |
self.metadata = []
|
42 |
|
43 |
def add_document(self, text: str, source: str):
|
44 |
-
"""Add document to vector index with metadata"""
|
45 |
embedding = self._get_embedding(text)
|
46 |
self.index.add(np.array([embedding], dtype=np.float32))
|
47 |
self.documents.append(text)
|
48 |
-
self.metadata.append({
|
49 |
-
"source": source,
|
50 |
-
"timestamp": time.time()
|
51 |
-
})
|
52 |
|
53 |
def search(self, query: str) -> List[Tuple[str, Dict]]:
|
54 |
-
"""Search index with relevance scoring"""
|
55 |
query_embed = self._get_embedding(query)
|
56 |
distances, indices = self.index.search(np.array([query_embed], dtype=np.float32), AppConfig.MAX_CONTEXT_CHUNKS)
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
if idx >= 0 and distances[0][i] < AppConfig.RAG_THRESHOLD
|
61 |
-
|
62 |
-
self.documents[idx],
|
63 |
-
{**self.metadata[idx], "score": float(distances[0][i])}
|
64 |
-
))
|
65 |
-
return results
|
66 |
|
67 |
def _get_embedding(self, text: str) -> List[float]:
|
68 |
-
|
69 |
-
response = client.embeddings.create(
|
70 |
-
input=text,
|
71 |
-
model=AppConfig.EMBEDDING_MODEL
|
72 |
-
)
|
73 |
return response.data[0].embedding
|
74 |
|
75 |
-
# Initialize vector index
|
76 |
knowledge_base = VectorIndex()
|
77 |
|
78 |
# --------------------------
|
79 |
-
# Pharmaceutical Tools
|
80 |
# --------------------------
|
81 |
class PharmaTools:
|
82 |
@staticmethod
|
83 |
def clinical_trial_search(query: str) -> Dict:
|
84 |
-
"""Search clinical trials with safety checks"""
|
85 |
try:
|
86 |
-
|
87 |
-
model=AppConfig.OPENAI_MODEL,
|
88 |
-
messages=[{
|
89 |
-
"role": "user",
|
90 |
-
"content": f"Generate clinicaltrials.gov API parameters for: {query}"
|
91 |
-
}],
|
92 |
-
tools=[{
|
93 |
-
"type": "function",
|
94 |
-
"function": {
|
95 |
-
"name": "clinical_trial_search",
|
96 |
-
"description": "Search clinical trials database",
|
97 |
-
"parameters": {
|
98 |
-
"type": "object",
|
99 |
-
"properties": {
|
100 |
-
"query": {"type": "string"},
|
101 |
-
"max_results": {"type": "integer"}
|
102 |
-
}
|
103 |
-
}
|
104 |
-
}
|
105 |
-
}]
|
106 |
-
)
|
107 |
-
|
108 |
-
# Execute actual API call here
|
109 |
-
return {"results": "Sample clinical trial data"}
|
110 |
-
|
111 |
except Exception as e:
|
112 |
return {"error": str(e)}
|
113 |
|
114 |
@staticmethod
|
115 |
def molecular_analysis(smiles: str) -> Dict:
|
116 |
-
"""Analyze molecular structure with RDKit"""
|
117 |
try:
|
118 |
mol = Chem.MolFromSmiles(smiles)
|
119 |
if not mol:
|
120 |
return {"error": "Invalid SMILES"}
|
121 |
|
122 |
properties = {
|
123 |
-
"molecular_weight":
|
124 |
-
"logp":
|
125 |
-
"h_bond_donors":
|
126 |
}
|
127 |
|
128 |
-
# Add AI-powered predictions
|
129 |
ai_analysis = client.chat.completions.create(
|
130 |
model=AppConfig.OPENAI_MODEL,
|
131 |
-
messages=[{
|
132 |
-
"role": "user",
|
133 |
-
"content": f"Predict pharmaceutical properties for SMILES {smiles}:\n{properties}"
|
134 |
-
}]
|
135 |
)
|
136 |
|
137 |
return {
|
138 |
"calculated": properties,
|
139 |
"ai_predictions": json.loads(ai_analysis.choices[0].message.content)
|
140 |
}
|
141 |
-
|
142 |
except Exception as e:
|
143 |
return {"error": str(e)}
|
144 |
|
145 |
# --------------------------
|
146 |
-
# AI Agent
|
147 |
# --------------------------
|
148 |
class PharmaAgent:
|
149 |
def __init__(self):
|
@@ -151,112 +105,50 @@ class PharmaAgent:
|
|
151 |
self.system_prompt = self._build_system_prompt()
|
152 |
|
153 |
def query(self, prompt: str) -> str:
|
154 |
-
"""Execute query with RAG context"""
|
155 |
-
# Retrieve relevant knowledge
|
156 |
rag_context = knowledge_base.search(prompt)
|
157 |
-
|
158 |
-
# Build context-aware prompt
|
159 |
-
messages = [{
|
160 |
-
"role": "system",
|
161 |
-
"content": self.system_prompt
|
162 |
-
}]
|
163 |
|
164 |
if rag_context:
|
165 |
messages.append({
|
166 |
"role": "assistant",
|
167 |
-
"content": "Relevant knowledge:\n" + "\n".join([f"[Source: {meta['source']}]\n{text}"
|
168 |
-
for text, meta in rag_context])
|
169 |
})
|
170 |
-
|
171 |
-
messages.append({
|
172 |
-
"role": "user",
|
173 |
-
"content": prompt
|
174 |
-
})
|
175 |
|
176 |
-
|
|
|
177 |
try:
|
178 |
response = client.chat.completions.create(
|
179 |
model=AppConfig.OPENAI_MODEL,
|
180 |
messages=messages,
|
181 |
-
tools=self._tools_schema(),
|
182 |
timeout=AppConfig.API_TIMEOUT
|
183 |
)
|
184 |
-
|
185 |
-
return self._process_response(response)
|
186 |
-
|
187 |
except Exception as e:
|
188 |
return f"Error: {str(e)}"
|
189 |
|
190 |
def _build_system_prompt(self) -> str:
|
191 |
-
"""
|
192 |
-
return f"""You are a pharmaceutical research AI with access to:
|
193 |
-
- Molecular analysis tools
|
194 |
-
- Clinical trial databases
|
195 |
-
- Latest research via RAG (Updated: {time.ctime()})
|
196 |
-
|
197 |
-
Follow these rules:
|
198 |
-
1. Always validate chemical structures
|
199 |
-
2. Cite sources from RAG context
|
200 |
-
3. Check for recent regulatory updates
|
201 |
-
4. Maintain safety protocols"""
|
202 |
-
|
203 |
-
def _tools_schema(self) -> List[Dict]:
|
204 |
-
"""Generate OpenAI-compatible tool schema"""
|
205 |
-
return [{
|
206 |
-
"type": "function",
|
207 |
-
"function": {
|
208 |
-
"name": tool.__name__,
|
209 |
-
"description": tool.__doc__,
|
210 |
-
"parameters": {
|
211 |
-
"type": "object",
|
212 |
-
"properties": {
|
213 |
-
# Add parameter definitions here
|
214 |
-
}
|
215 |
-
}
|
216 |
-
}
|
217 |
-
} for tool in self.tools]
|
218 |
-
|
219 |
-
def _process_response(self, response) -> str:
|
220 |
-
"""Handle tool calls and response parsing"""
|
221 |
-
# Add tool execution logic here
|
222 |
-
return response.choices[0].message.content
|
223 |
|
224 |
# --------------------------
|
225 |
-
# Streamlit UI
|
226 |
# --------------------------
|
227 |
def main():
|
228 |
-
st.set_page_config(
|
229 |
-
page_title="PharmaAI Research Suite",
|
230 |
-
page_icon="🧬",
|
231 |
-
layout="wide"
|
232 |
-
)
|
233 |
-
|
234 |
st.title("PharmaAI Research Suite")
|
235 |
st.markdown("Integrated AI Platform for Pharmaceutical Research")
|
236 |
|
237 |
-
# Initialize components
|
238 |
agent = PharmaAgent()
|
239 |
|
240 |
-
# Knowledge Base Management
|
241 |
with st.sidebar:
|
242 |
st.header("🧠 Knowledge Base")
|
243 |
-
uploaded_files = st.file_uploader("Upload Research Documents",
|
244 |
-
type=["pdf", "txt"],
|
245 |
-
accept_multiple_files=True)
|
246 |
|
247 |
if uploaded_files:
|
248 |
for file in uploaded_files:
|
249 |
-
text = ""
|
250 |
-
if file.type == "application/pdf":
|
251 |
-
reader = PdfReader(file)
|
252 |
-
text = "\n".join([page.extract_text() for page in reader.pages])
|
253 |
-
else:
|
254 |
-
text = file.getvalue().decode()
|
255 |
-
|
256 |
knowledge_base.add_document(text, file.name)
|
257 |
st.success(f"Added {len(uploaded_files)} documents to knowledge base")
|
258 |
|
259 |
-
# Main Interface
|
260 |
tab1, tab2, tab3 = st.tabs(["Drug Development", "Molecular Analysis", "Literature Review"])
|
261 |
|
262 |
with tab1:
|
@@ -265,13 +157,7 @@ def main():
|
|
265 |
strategy = st.selectbox("Development Strategy", ["First-in-class", "Me-too", "Biologic"])
|
266 |
|
267 |
if st.button("Generate Development Plan"):
|
268 |
-
|
269 |
-
- Target validation
|
270 |
-
- Safety profile
|
271 |
-
- Competitive landscape
|
272 |
-
- Regulatory pathway"""
|
273 |
-
|
274 |
-
response = agent.query(prompt)
|
275 |
st.markdown(response)
|
276 |
|
277 |
with tab2:
|
@@ -284,18 +170,11 @@ def main():
|
|
284 |
st.error(analysis["error"])
|
285 |
else:
|
286 |
col1, col2 = st.columns(2)
|
287 |
-
|
288 |
-
|
289 |
-
st.json(analysis["calculated"])
|
290 |
-
with col2:
|
291 |
-
st.subheader("AI Predictions")
|
292 |
-
st.json(analysis["ai_predictions"])
|
293 |
-
|
294 |
-
# Visualization
|
295 |
mol = Chem.MolFromSmiles(smiles)
|
296 |
if mol:
|
297 |
-
|
298 |
-
st.image(img, caption="Molecular Structure")
|
299 |
|
300 |
with tab3:
|
301 |
st.header("Literature Review")
|
@@ -303,12 +182,9 @@ def main():
|
|
303 |
|
304 |
if research_query:
|
305 |
results = knowledge_base.search(research_query)
|
306 |
-
|
307 |
-
st.
|
308 |
-
|
309 |
-
for text, meta in results:
|
310 |
-
with st.expander(f"Source: {meta['source']} (Score: {meta['score']:.2f})"):
|
311 |
-
st.markdown(f"```\n{text[:1000]}...\n```")
|
312 |
|
313 |
if __name__ == "__main__":
|
314 |
-
main()
|
|
|
|
|
1 |
import os
|
2 |
+
import time
|
3 |
+
import json
|
4 |
+
import numpy as np
|
5 |
+
import streamlit as st
|
6 |
import tempfile
|
7 |
import requests
|
8 |
+
from dotenv import load_dotenv
|
9 |
from openai import OpenAI
|
10 |
from rdkit import Chem
|
11 |
+
from rdkit.Chem import Draw, Descriptors, Crippen, Lipinski
|
12 |
import faiss
|
|
|
13 |
from PyPDF2 import PdfReader
|
14 |
+
from typing import List, Dict, Tuple
|
|
|
|
|
15 |
|
16 |
+
# Load environment variables
|
|
|
|
|
17 |
load_dotenv()
|
18 |
|
19 |
+
# --------------------------
|
20 |
+
# Configuration Settings
|
21 |
+
# --------------------------
|
22 |
class AppConfig:
|
23 |
OPENAI_MODEL = "gpt-4-turbo-preview"
|
24 |
EMBEDDING_MODEL = "text-embedding-3-large"
|
|
|
27 |
RAG_THRESHOLD = 0.78
|
28 |
MAX_CONTEXT_CHUNKS = 5
|
29 |
|
30 |
+
# Initialize OpenAI Client
|
|
|
|
|
31 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
32 |
|
33 |
# --------------------------
|
34 |
+
# Vector Index for RAG
|
35 |
# --------------------------
|
36 |
class VectorIndex:
|
37 |
def __init__(self):
|
|
|
40 |
self.metadata = []
|
41 |
|
42 |
def add_document(self, text: str, source: str):
|
|
|
43 |
embedding = self._get_embedding(text)
|
44 |
self.index.add(np.array([embedding], dtype=np.float32))
|
45 |
self.documents.append(text)
|
46 |
+
self.metadata.append({"source": source, "timestamp": time.time()})
|
|
|
|
|
|
|
47 |
|
48 |
def search(self, query: str) -> List[Tuple[str, Dict]]:
|
|
|
49 |
query_embed = self._get_embedding(query)
|
50 |
distances, indices = self.index.search(np.array([query_embed], dtype=np.float32), AppConfig.MAX_CONTEXT_CHUNKS)
|
51 |
|
52 |
+
return [
|
53 |
+
(self.documents[idx], {**self.metadata[idx], "score": float(distances[0][i])})
|
54 |
+
for i, idx in enumerate(indices[0]) if idx >= 0 and distances[0][i] < AppConfig.RAG_THRESHOLD
|
55 |
+
]
|
|
|
|
|
|
|
|
|
56 |
|
57 |
def _get_embedding(self, text: str) -> List[float]:
|
58 |
+
response = client.embeddings.create(input=text, model=AppConfig.EMBEDDING_MODEL)
|
|
|
|
|
|
|
|
|
59 |
return response.data[0].embedding
|
60 |
|
|
|
61 |
knowledge_base = VectorIndex()
|
62 |
|
63 |
# --------------------------
|
64 |
+
# Pharmaceutical Tools
|
65 |
# --------------------------
|
66 |
class PharmaTools:
|
67 |
@staticmethod
|
68 |
def clinical_trial_search(query: str) -> Dict:
|
|
|
69 |
try:
|
70 |
+
return {"results": "Sample clinical trial data"} # Placeholder for API integration
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
except Exception as e:
|
72 |
return {"error": str(e)}
|
73 |
|
74 |
@staticmethod
|
75 |
def molecular_analysis(smiles: str) -> Dict:
|
|
|
76 |
try:
|
77 |
mol = Chem.MolFromSmiles(smiles)
|
78 |
if not mol:
|
79 |
return {"error": "Invalid SMILES"}
|
80 |
|
81 |
properties = {
|
82 |
+
"molecular_weight": Descriptors.ExactMolWt(mol),
|
83 |
+
"logp": Crippen.MolLogP(mol),
|
84 |
+
"h_bond_donors": Lipinski.NumHDonors(mol)
|
85 |
}
|
86 |
|
|
|
87 |
ai_analysis = client.chat.completions.create(
|
88 |
model=AppConfig.OPENAI_MODEL,
|
89 |
+
messages=[{"role": "user", "content": f"Predict properties for SMILES {smiles}:\n{properties}"}]
|
|
|
|
|
|
|
90 |
)
|
91 |
|
92 |
return {
|
93 |
"calculated": properties,
|
94 |
"ai_predictions": json.loads(ai_analysis.choices[0].message.content)
|
95 |
}
|
|
|
96 |
except Exception as e:
|
97 |
return {"error": str(e)}
|
98 |
|
99 |
# --------------------------
|
100 |
+
# Pharma AI Agent
|
101 |
# --------------------------
|
102 |
class PharmaAgent:
|
103 |
def __init__(self):
|
|
|
105 |
self.system_prompt = self._build_system_prompt()
|
106 |
|
107 |
def query(self, prompt: str) -> str:
|
|
|
|
|
108 |
rag_context = knowledge_base.search(prompt)
|
109 |
+
messages = [{"role": "system", "content": self.system_prompt}]
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
if rag_context:
|
112 |
messages.append({
|
113 |
"role": "assistant",
|
114 |
+
"content": "Relevant knowledge:\n" + "\n".join([f"[Source: {meta['source']}]\n{text}" for text, meta in rag_context])
|
|
|
115 |
})
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
+
messages.append({"role": "user", "content": prompt})
|
118 |
+
|
119 |
try:
|
120 |
response = client.chat.completions.create(
|
121 |
model=AppConfig.OPENAI_MODEL,
|
122 |
messages=messages,
|
|
|
123 |
timeout=AppConfig.API_TIMEOUT
|
124 |
)
|
125 |
+
return response.choices[0].message.content
|
|
|
|
|
126 |
except Exception as e:
|
127 |
return f"Error: {str(e)}"
|
128 |
|
129 |
def _build_system_prompt(self) -> str:
|
130 |
+
return f"""You are a pharmaceutical AI with access to molecular analysis, clinical trial data, and research via RAG (Updated: {time.ctime()}). Follow safety protocols."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
# --------------------------
|
133 |
+
# Streamlit UI
|
134 |
# --------------------------
|
135 |
def main():
|
136 |
+
st.set_page_config(page_title="PharmaAI Research Suite", page_icon="🧬", layout="wide")
|
|
|
|
|
|
|
|
|
|
|
137 |
st.title("PharmaAI Research Suite")
|
138 |
st.markdown("Integrated AI Platform for Pharmaceutical Research")
|
139 |
|
|
|
140 |
agent = PharmaAgent()
|
141 |
|
|
|
142 |
with st.sidebar:
|
143 |
st.header("🧠 Knowledge Base")
|
144 |
+
uploaded_files = st.file_uploader("Upload Research Documents", type=["pdf", "txt"], accept_multiple_files=True)
|
|
|
|
|
145 |
|
146 |
if uploaded_files:
|
147 |
for file in uploaded_files:
|
148 |
+
text = "\n".join([page.extract_text() for page in PdfReader(file).pages]) if file.type == "application/pdf" else file.getvalue().decode()
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
knowledge_base.add_document(text, file.name)
|
150 |
st.success(f"Added {len(uploaded_files)} documents to knowledge base")
|
151 |
|
|
|
152 |
tab1, tab2, tab3 = st.tabs(["Drug Development", "Molecular Analysis", "Literature Review"])
|
153 |
|
154 |
with tab1:
|
|
|
157 |
strategy = st.selectbox("Development Strategy", ["First-in-class", "Me-too", "Biologic"])
|
158 |
|
159 |
if st.button("Generate Development Plan"):
|
160 |
+
response = agent.query(f"Develop {strategy} drug targeting {target}.")
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
st.markdown(response)
|
162 |
|
163 |
with tab2:
|
|
|
170 |
st.error(analysis["error"])
|
171 |
else:
|
172 |
col1, col2 = st.columns(2)
|
173 |
+
col1.json(analysis["calculated"])
|
174 |
+
col2.json(analysis["ai_predictions"])
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
mol = Chem.MolFromSmiles(smiles)
|
176 |
if mol:
|
177 |
+
st.image(Draw.MolToImage(mol, size=(400, 400)), caption="Molecular Structure")
|
|
|
178 |
|
179 |
with tab3:
|
180 |
st.header("Literature Review")
|
|
|
182 |
|
183 |
if research_query:
|
184 |
results = knowledge_base.search(research_query)
|
185 |
+
for text, meta in results:
|
186 |
+
with st.expander(f"Source: {meta['source']} (Score: {meta['score']:.2f})"):
|
187 |
+
st.markdown(f"```\n{text[:1000]}...\n```")
|
|
|
|
|
|
|
188 |
|
189 |
if __name__ == "__main__":
|
190 |
+
main()
|