rajsecrets0 commited on
Commit
4e37398
·
verified ·
1 Parent(s): e4e609d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -337
app.py CHANGED
@@ -1,358 +1,35 @@
1
  import os
2
- import chainlit as cl
3
- import aiohttp
4
- from PIL import Image
5
- from pathlib import Path
6
- import tempfile
7
- import shutil
8
  from dotenv import load_dotenv
9
- import PyPDF2
10
- import pytesseract
11
- import requests
12
- from bs4 import BeautifulSoup
13
- import asyncio
14
- import logging
15
- from typing import Optional, List, Dict, Any
16
- import sqlite3
17
- from datetime import datetime
18
- from langchain.memory import ConversationBufferMemory
19
- from langchain.chains import ConversationChain
20
- from langchain_community.llms import OpenAI
21
 
22
- # Load environment variables
23
  load_dotenv()
24
 
25
- # Configure logging
26
- logging.basicConfig(level=logging.INFO)
27
- logger = logging.getLogger(__name__)
 
28
 
29
- # Configuration
30
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 
 
31
  if not GEMINI_API_KEY:
32
  raise ValueError("GEMINI_API_KEY environment variable is not set")
33
-
34
- SERP_API_KEY = os.getenv("SERP_API_KEY")
35
  if not SERP_API_KEY:
36
  raise ValueError("SERP_API_KEY environment variable is not set")
37
 
38
- GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent"
39
- ALLOWED_EXTENSIONS = {'.pdf', '.jpg', '.jpeg', '.png', '.txt'}
40
- MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
41
-
42
- # Database setup
43
- DB_NAME = "conversation_history.db"
44
-
45
- def init_db():
46
- """Initialize the SQLite database for conversation history."""
47
- conn = sqlite3.connect(DB_NAME)
48
- cursor = conn.cursor()
49
- cursor.execute("""
50
- CREATE TABLE IF NOT EXISTS conversations (
51
- session_id TEXT PRIMARY KEY,
52
- user_id TEXT,
53
- timestamp DATETIME,
54
- history TEXT
55
- )
56
- """)
57
- conn.commit()
58
- conn.close()
59
-
60
- init_db()
61
-
62
- # System Prompts
63
- DEFAULT_SYSTEM_PROMPT = "You are Bella, a highly intelligent and personalized Biosecurity Expert designed to assist students, researchers, and professionals in the field of biosecurity. you need to give relevant arxiv papers links, Articles links and relevant Links at the end "
64
-
65
- RESEARCHER_MODE_PROMPT = """
66
- You are Bella, a highly intelligent and personalized Biosecurity Expert designed to assist students, researchers, and professionals in the field of biosecurity. Your mission is to provide clear, concise, and actionable information while supporting users in their research and problem-solving tasks.
67
-
68
- You are equipped with advanced capabilities and u can generate, including:
69
-
70
- - Access to research papers and articles from ArXiv, Google Scholar, and other credible sources.
71
- - Real-time web search to provide the latest updates, news, and policy documents in biosecurity.
72
- - The ability to analyze and summarize PDFs, images, and textual data, extracting key insights and presenting them effectively.
73
- - Tailored recommendations for actions, best practices, and resources based on user input.
74
- - Scenario simulations and risk assessment tools to aid in training and decision-making.
75
- - Collaboration tools for shared research and exporting insights.
76
- - Notifications and alerts for significant developments in biosecurity.
77
-
78
- Your responses should always be:
79
-
80
- - Well-Organized: Use structured sections, clear headings, bullet points, and concise summaries.
81
- - Visually Engaging: Present outputs with tables, formatted text, or charts wherever possible.
82
- - Accurate and Credible: Base your responses on reliable sources and provide direct links for further reading.
83
- - User-Focused: Adapt to the user's needs, offering personalized guidance and proactive assistance.
84
- - you need to give relevant arxiv papers links, Articles links and relevant info at the end.
85
-
86
- Always maintain a professional yet approachable tone, making it easy for users to understand and act on the information provided.
87
- """
88
-
89
-
90
- class BiosecurityAnalyzer:
91
- def __init__(self):
92
- self.api_key = GEMINI_API_KEY
93
- self.session: Optional[aiohttp.ClientSession] = None
94
- self.temp_dir: Optional[Path] = None
95
-
96
- async def initialize(self):
97
- """Initialize async session and temporary directory."""
98
- self.session = aiohttp.ClientSession()
99
- self.temp_dir = Path(tempfile.mkdtemp())
100
- logger.info(f"Initialized analyzer with temp directory: {self.temp_dir}")
101
-
102
- async def cleanup(self):
103
- """Cleanup resources."""
104
- if self.session:
105
- await self.session.close()
106
- if self.temp_dir and self.temp_dir.exists():
107
- shutil.rmtree(self.temp_dir)
108
- logger.info("Cleaned up temporary directory")
109
-
110
- async def analyze_text(self, text: str, prompt: str = "") -> str:
111
- """Analyze text content using Gemini API."""
112
- if not self.session:
113
- await self.initialize()
114
-
115
- try:
116
- headers = {"Content-Type": "application/json"}
117
- payload = {
118
- "contents": [{
119
- "parts": [{"text": f"{prompt}\n\n{text}"}]
120
- }]
121
- }
122
-
123
- async with self.session.post(
124
- f"{GEMINI_URL}?key={self.api_key}",
125
- headers=headers,
126
- json=payload,
127
- timeout=30
128
- ) as response:
129
- if response.status != 200:
130
- error_text = await response.text()
131
- raise Exception(f"API Error: {response.status} - {error_text}")
132
-
133
- result = await response.json()
134
- return result["candidates"][0]["content"]["parts"][0]["text"]
135
-
136
- except Exception as e:
137
- logger.error(f"Analysis failed: {str(e)}")
138
- raise
139
-
140
- async def extract_text_from_pdf(self, file_path: Path) -> str:
141
- """Extract text from a PDF file."""
142
- try:
143
- with open(file_path, "rb") as file:
144
- reader = PyPDF2.PdfReader(file)
145
- text = ""
146
- for page in reader.pages:
147
- text += page.extract_text()
148
- return text
149
- except Exception as e:
150
- logger.error(f"Failed to extract text from PDF: {str(e)}")
151
- raise
152
-
153
- async def extract_text_from_image(self, file_path: Path) -> str:
154
- """Extract text from an image using OCR."""
155
- try:
156
- image = Image.open(file_path)
157
- text = pytesseract.image_to_string(image)
158
- return text
159
- except Exception as e:
160
- logger.error(f"Failed to extract text from image: {str(e)}")
161
- raise
162
-
163
- async def scrape_website_content(self, url: str) -> str:
164
- """Scrape text content from a website."""
165
- try:
166
- response = requests.get(url)
167
- response.raise_for_status()
168
- soup = BeautifulSoup(response.text, "html.parser")
169
- text = soup.get_text(separator="\n")
170
- return text
171
- except Exception as e:
172
- logger.error(f"Failed to scrape website: {str(e)}")
173
- raise
174
-
175
- async def fetch_google_results(self, query: str, start: int = 0) -> List[Dict[str, str]]:
176
- """Fetch Google search results using SERP API."""
177
- try:
178
- params = {
179
- "q": query,
180
- "api_key": SERP_API_KEY,
181
- "start": start,
182
- "num": 5 # Fetch 5 results at a time
183
- }
184
- response = requests.get("https://serpapi.com/search", params=params)
185
- response.raise_for_status()
186
- results = response.json().get("organic_results", [])
187
- return results
188
- except Exception as e:
189
- logger.error(f"Failed to fetch Google results: {str(e)}")
190
- raise
191
-
192
-
193
- @cl.action_callback("google_search")
194
- async def on_google_search(action: cl.Action):
195
- """Handle Google Search button toggle."""
196
- google_search_enabled = not cl.user_session.get("google_search_enabled", False)
197
- cl.user_session.set("google_search_enabled", google_search_enabled)
198
-
199
- action.label = "Google Search: " + ("On" if google_search_enabled else "Off")
200
-
201
- await cl.Message(
202
- content="",
203
- actions=[action]
204
- ).send()
205
-
206
- await cl.Message(content=f"Google Search is now {'enabled' if google_search_enabled else 'disabled'}.").send()
207
-
208
-
209
- @cl.action_callback("researcher_mode")
210
- async def on_researcher_mode(action: cl.Action):
211
- """Handle Researcher Mode button toggle."""
212
- researcher_mode_enabled = not cl.user_session.get("researcher_mode_enabled", False)
213
- cl.user_session.set("researcher_mode_enabled", researcher_mode_enabled)
214
-
215
- action.label = "Researcher Mode: " + ("On" if researcher_mode_enabled else "Off")
216
-
217
- await cl.Message(
218
- content="",
219
- actions=[action]
220
- ).send()
221
-
222
- await cl.Message(content=f"Researcher Mode is now {'enabled' if researcher_mode_enabled else 'disabled'}.").send()
223
-
224
 
 
225
  @cl.on_chat_start
226
  async def start():
227
- """Initialize chat session."""
228
- analyzer = BiosecurityAnalyzer()
229
- await analyzer.initialize()
230
- cl.user_session.set("analyzer", analyzer)
231
-
232
- # Initialize LangChain memory for short-term context
233
- memory = ConversationBufferMemory()
234
- cl.user_session.set("memory", memory)
235
-
236
- # Add "Google Search" and "Researcher Mode" buttons to the UI
237
- await cl.Message(
238
- content="",
239
- actions=[
240
- cl.Action(name="google_search", value="toggle", label="Google Search",
241
- description="Toggle on this button to get realtime results from Google and websites.",
242
- payload={"enabled": False}),
243
- cl.Action(name="researcher_mode", value="toggle", label="Researcher Mode",
244
- description="Toggle on this button to enable advanced biosecurity expert capabilities.",
245
- payload={"enabled": False})
246
- ]
247
- ).send()
248
-
249
- welcome_message = """
250
- # **BELLA** 🧪
251
-
252
- Welcome to the **Biosecurity Engine for Learning, Logging, and Analysis** 🤖
253
-
254
- **Made with 💚 by BlueDot Impact Biosecurity Course**
255
- *For Community, by Community*
256
-
257
- Let’s work together to create a safer, more secure world! 🌐✨
258
-
259
- **How can I assist you today?**
260
- """
261
- await cl.Message(content=welcome_message).send()
262
-
263
 
264
  @cl.on_message
265
  async def main(message: cl.Message):
266
- """Handle incoming messages and file uploads."""
267
- analyzer = cl.user_session.get("analyzer")
268
- memory = cl.user_session.get("memory")
269
- if not analyzer:
270
- analyzer = BiosecurityAnalyzer()
271
- await analyzer.initialize()
272
- cl.user_session.set("analyzer", analyzer)
273
-
274
- user_input = message.content
275
- combined_content = ""
276
- gemini_response = "" # Initialize gemini_response to avoid reference errors
277
-
278
- try:
279
- # Check if the user provided a URL
280
- if "http://" in user_input or "https://" in user_input:
281
- url = next((s for s in user_input.split() if s.startswith("http")), None)
282
- if url:
283
- try:
284
- scraped_text = await analyzer.scrape_website_content(url)
285
- combined_content += f"{scraped_text}\n\n"
286
- user_input = user_input.replace(url, "") # Remove URL from the input
287
- except Exception as e:
288
- logger.error(f"Failed to scrape URL: {str(e)}")
289
- await cl.Message(content=f"❌ Failed to process the URL: {str(e)}").send()
290
-
291
- # Check if the message contains a file
292
- if message.elements:
293
- for element in message.elements:
294
- file_path = Path(element.path)
295
- file_extension = file_path.suffix.lower()
296
-
297
- if file_extension not in ALLOWED_EXTENSIONS:
298
- await cl.Message(content=f"Unsupported file type: {file_extension}").send()
299
- continue
300
-
301
- # Process the file based on its type
302
- try:
303
- if file_extension == ".pdf":
304
- extracted_text = await analyzer.extract_text_from_pdf(file_path)
305
- elif file_extension in {".jpg", ".jpeg", ".png"}:
306
- extracted_text = await analyzer.extract_text_from_image(file_path)
307
- else:
308
- await cl.Message(content="Unsupported file type.").send()
309
- continue
310
-
311
- combined_content += f"{extracted_text}\n\n"
312
- except Exception as e:
313
- logger.error(f"Failed to process file: {str(e)}")
314
- await cl.Message(content=f"❌ Failed to process the file: {str(e)}").send()
315
-
316
- # Add user-provided text (if any)
317
- if user_input.strip():
318
- combined_content += f"{user_input}\n\n"
319
-
320
- # Show "Analyzing..." message
321
- analyzing_msg = await cl.Message(content="Analyzing...").send()
322
-
323
- # Analyze the combined content using Gemini
324
- try:
325
- gemini_response = await analyzer.analyze_text(combined_content, prompt=DEFAULT_SYSTEM_PROMPT)
326
- # Display Gemini output
327
- await cl.Message(content=gemini_response).send()
328
-
329
- except Exception as e:
330
- logger.error(f"Analysis failed: {str(e)}")
331
- await cl.Message(content=f"❌ Analysis failed: {str(e)}").send()
332
-
333
- # Update memory with the latest interaction (only if gemini_response is defined)
334
- if gemini_response:
335
- memory.save_context({"input": user_input}, {"output": gemini_response})
336
-
337
- except Exception as e:
338
- error_msg = f"❌ Error: {str(e)}"
339
- logger.error(error_msg)
340
- await cl.Message(content=error_msg).send()
341
- finally:
342
- # Remove the "Analyzing..." message
343
- await analyzing_msg.remove()
344
-
345
-
346
- @cl.on_chat_end
347
- async def end():
348
- """Cleanup resources when chat ends."""
349
- try:
350
- analyzer = cl.user_session.get("analyzer")
351
- if analyzer:
352
- await analyzer.cleanup()
353
- except Exception as e:
354
- logger.error(f"Cleanup error: {str(e)}")
355
-
356
 
357
  if __name__ == "__main__":
358
  cl.run(app, port=7860)
 
1
  import os
 
 
 
 
 
 
2
  from dotenv import load_dotenv
3
+ import chainlit as cl
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Load environment variables from .env file
6
  load_dotenv()
7
 
8
+ # Debug: Print environment variables
9
+ print("Environment variables:")
10
+ for key, value in os.environ.items():
11
+ print(f"{key}: {value}")
12
 
13
+ # Check if API keys are loaded
14
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
15
+ SERP_API_KEY = os.getenv("SERP_API_KEY")
16
+
17
  if not GEMINI_API_KEY:
18
  raise ValueError("GEMINI_API_KEY environment variable is not set")
 
 
19
  if not SERP_API_KEY:
20
  raise ValueError("SERP_API_KEY environment variable is not set")
21
 
22
+ print(f"GEMINI_API_KEY: {GEMINI_API_KEY}")
23
+ print(f"SERP_API_KEY: {SERP_API_KEY}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Chainlit app
26
  @cl.on_chat_start
27
  async def start():
28
+ await cl.Message(content="Hello! How can I assist you today?").send()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  @cl.on_message
31
  async def main(message: cl.Message):
32
+ await cl.Message(content=f"You said: {message.content}").send()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  if __name__ == "__main__":
35
  cl.run(app, port=7860)