File size: 13,718 Bytes
2bff716
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9daf8d1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
import os
import chainlit as cl
import aiohttp
from PIL import Image
from pathlib import Path
import tempfile
import shutil
from dotenv import load_dotenv
import PyPDF2
import pytesseract
import requests
from bs4 import BeautifulSoup
import asyncio
import logging
from typing import Optional, List, Dict, Any
import sqlite3
from datetime import datetime
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain_community.llms import OpenAI

# Load environment variables
load_dotenv()

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configuration
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY environment variable is not set")

SERP_API_KEY = os.getenv("SERP_API_KEY")
if not SERP_API_KEY:
    raise ValueError("SERP_API_KEY environment variable is not set")

GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent"
ALLOWED_EXTENSIONS = {'.pdf', '.jpg', '.jpeg', '.png', '.txt'}
MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB

# Database setup
DB_NAME = "conversation_history.db"

def init_db():
    """Initialize the SQLite database for conversation history."""
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS conversations (
            session_id TEXT PRIMARY KEY,
            user_id TEXT,
            timestamp DATETIME,
            history TEXT
        )
    """)
    conn.commit()
    conn.close()

init_db()

# System Prompts
DEFAULT_SYSTEM_PROMPT = "You are Bella, a highly intelligent and personalized Biosecurity Expert designed to assist students, researchers, and professionals in the field of biosecurity. you need to give relevant arxiv papers links, Articles links and relevant Links at the end "

RESEARCHER_MODE_PROMPT = """
You are Bella, a highly intelligent and personalized Biosecurity Expert designed to assist students, researchers, and professionals in the field of biosecurity. Your mission is to provide clear, concise, and actionable information while supporting users in their research and problem-solving tasks.

You are equipped with advanced capabilities and u can generate, including:

- Access to research papers and articles from ArXiv, Google Scholar, and other credible sources.
- Real-time web search to provide the latest updates, news, and policy documents in biosecurity.
- The ability to analyze and summarize PDFs, images, and textual data, extracting key insights and presenting them effectively.
- Tailored recommendations for actions, best practices, and resources based on user input.
- Scenario simulations and risk assessment tools to aid in training and decision-making.
- Collaboration tools for shared research and exporting insights.
- Notifications and alerts for significant developments in biosecurity.

Your responses should always be:

- Well-Organized: Use structured sections, clear headings, bullet points, and concise summaries.
- Visually Engaging: Present outputs with tables, formatted text, or charts wherever possible.
- Accurate and Credible: Base your responses on reliable sources and provide direct links for further reading.
- User-Focused: Adapt to the user's needs, offering personalized guidance and proactive assistance.
- you need to give relevant arxiv papers links, Articles links and relevant info at the end.

Always maintain a professional yet approachable tone, making it easy for users to understand and act on the information provided.
"""


class BiosecurityAnalyzer:
    def __init__(self):
        self.api_key = GEMINI_API_KEY
        self.session: Optional[aiohttp.ClientSession] = None
        self.temp_dir: Optional[Path] = None

    async def initialize(self):
        """Initialize async session and temporary directory."""
        self.session = aiohttp.ClientSession()
        self.temp_dir = Path(tempfile.mkdtemp())
        logger.info(f"Initialized analyzer with temp directory: {self.temp_dir}")

    async def cleanup(self):
        """Cleanup resources."""
        if self.session:
            await self.session.close()
        if self.temp_dir and self.temp_dir.exists():
            shutil.rmtree(self.temp_dir)
            logger.info("Cleaned up temporary directory")

    async def analyze_text(self, text: str, prompt: str = "") -> str:
        """Analyze text content using Gemini API."""
        if not self.session:
            await self.initialize()

        try:
            headers = {"Content-Type": "application/json"}
            payload = {
                "contents": [{
                    "parts": [{"text": f"{prompt}\n\n{text}"}]
                }]
            }

            async with self.session.post(
                f"{GEMINI_URL}?key={self.api_key}",
                headers=headers,
                json=payload,
                timeout=30
            ) as response:
                if response.status != 200:
                    error_text = await response.text()
                    raise Exception(f"API Error: {response.status} - {error_text}")

                result = await response.json()
                return result["candidates"][0]["content"]["parts"][0]["text"]

        except Exception as e:
            logger.error(f"Analysis failed: {str(e)}")
            raise

    async def extract_text_from_pdf(self, file_path: Path) -> str:
        """Extract text from a PDF file."""
        try:
            with open(file_path, "rb") as file:
                reader = PyPDF2.PdfReader(file)
                text = ""
                for page in reader.pages:
                    text += page.extract_text()
                return text
        except Exception as e:
            logger.error(f"Failed to extract text from PDF: {str(e)}")
            raise

    async def extract_text_from_image(self, file_path: Path) -> str:
        """Extract text from an image using OCR."""
        try:
            image = Image.open(file_path)
            text = pytesseract.image_to_string(image)
            return text
        except Exception as e:
            logger.error(f"Failed to extract text from image: {str(e)}")
            raise

    async def scrape_website_content(self, url: str) -> str:
        """Scrape text content from a website."""
        try:
            response = requests.get(url)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "html.parser")
            text = soup.get_text(separator="\n")
            return text
        except Exception as e:
            logger.error(f"Failed to scrape website: {str(e)}")
            raise

    async def fetch_google_results(self, query: str, start: int = 0) -> List[Dict[str, str]]:
        """Fetch Google search results using SERP API."""
        try:
            params = {
                "q": query,
                "api_key": SERP_API_KEY,
                "start": start,
                "num": 5  # Fetch 5 results at a time
            }
            response = requests.get("https://serpapi.com/search", params=params)
            response.raise_for_status()
            results = response.json().get("organic_results", [])
            return results
        except Exception as e:
            logger.error(f"Failed to fetch Google results: {str(e)}")
            raise


@cl.action_callback("google_search")
async def on_google_search(action: cl.Action):
    """Handle Google Search button toggle."""
    google_search_enabled = not cl.user_session.get("google_search_enabled", False)
    cl.user_session.set("google_search_enabled", google_search_enabled)

    action.label = "Google Search: " + ("On" if google_search_enabled else "Off")

    await cl.Message(
        content="",
        actions=[action]
    ).send()

    await cl.Message(content=f"Google Search is now {'enabled' if google_search_enabled else 'disabled'}.").send()


@cl.action_callback("researcher_mode")
async def on_researcher_mode(action: cl.Action):
    """Handle Researcher Mode button toggle."""
    researcher_mode_enabled = not cl.user_session.get("researcher_mode_enabled", False)
    cl.user_session.set("researcher_mode_enabled", researcher_mode_enabled)

    action.label = "Researcher Mode: " + ("On" if researcher_mode_enabled else "Off")

    await cl.Message(
        content="",
        actions=[action]
    ).send()

    await cl.Message(content=f"Researcher Mode is now {'enabled' if researcher_mode_enabled else 'disabled'}.").send()


@cl.on_chat_start
async def start():
    """Initialize chat session."""
    analyzer = BiosecurityAnalyzer()
    await analyzer.initialize()
    cl.user_session.set("analyzer", analyzer)

    # Initialize LangChain memory for short-term context
    memory = ConversationBufferMemory()
    cl.user_session.set("memory", memory)

    # Add "Google Search" and "Researcher Mode" buttons to the UI
    await cl.Message(
        content="",
        actions=[
            cl.Action(name="google_search", value="toggle", label="Google Search",
                      description="Toggle on this button to get realtime results from Google and websites.",
                      payload={"enabled": False}),
            cl.Action(name="researcher_mode", value="toggle", label="Researcher Mode",
                      description="Toggle on this button to enable advanced biosecurity expert capabilities.",
                      payload={"enabled": False})
        ]
    ).send()

    welcome_message = """
# **BELLA** πŸ§ͺ  

Welcome to the **Biosecurity Engine for Learning, Logging, and Analysis** πŸ€–  

**Made with πŸ’š by BlueDot Impact Biosecurity Course**  
*For Community, by Community*  

Let’s work together to create a safer, more secure world! 🌐✨

**How can I assist you today?**
"""
    await cl.Message(content=welcome_message).send()


@cl.on_message
async def main(message: cl.Message):
    """Handle incoming messages and file uploads."""
    analyzer = cl.user_session.get("analyzer")
    memory = cl.user_session.get("memory")
    if not analyzer:
        analyzer = BiosecurityAnalyzer()
        await analyzer.initialize()
        cl.user_session.set("analyzer", analyzer)

    user_input = message.content
    combined_content = ""
    gemini_response = ""  # Initialize gemini_response to avoid reference errors

    try:
        # Check if the user provided a URL
        if "http://" in user_input or "https://" in user_input:
            url = next((s for s in user_input.split() if s.startswith("http")), None)
            if url:
                try:
                    scraped_text = await analyzer.scrape_website_content(url)
                    combined_content += f"{scraped_text}\n\n"
                    user_input = user_input.replace(url, "")  # Remove URL from the input
                except Exception as e:
                    logger.error(f"Failed to scrape URL: {str(e)}")
                    await cl.Message(content=f"❌ Failed to process the URL: {str(e)}").send()

        # Check if the message contains a file
        if message.elements:
            for element in message.elements:
                file_path = Path(element.path)
                file_extension = file_path.suffix.lower()

                if file_extension not in ALLOWED_EXTENSIONS:
                    await cl.Message(content=f"Unsupported file type: {file_extension}").send()
                    continue

                # Process the file based on its type
                try:
                    if file_extension == ".pdf":
                        extracted_text = await analyzer.extract_text_from_pdf(file_path)
                    elif file_extension in {".jpg", ".jpeg", ".png"}:
                        extracted_text = await analyzer.extract_text_from_image(file_path)
                    else:
                        await cl.Message(content="Unsupported file type.").send()
                        continue

                    combined_content += f"{extracted_text}\n\n"
                except Exception as e:
                    logger.error(f"Failed to process file: {str(e)}")
                    await cl.Message(content=f"❌ Failed to process the file: {str(e)}").send()

        # Add user-provided text (if any)
        if user_input.strip():
            combined_content += f"{user_input}\n\n"

        # Show "Analyzing..." message
        analyzing_msg = await cl.Message(content="Analyzing...").send()

        # Analyze the combined content using Gemini
        try:
            gemini_response = await analyzer.analyze_text(combined_content, prompt=DEFAULT_SYSTEM_PROMPT)
            # Display Gemini output
            await cl.Message(content=gemini_response).send()

        except Exception as e:
            logger.error(f"Analysis failed: {str(e)}")
            await cl.Message(content=f"❌ Analysis failed: {str(e)}").send()

        # Update memory with the latest interaction (only if gemini_response is defined)
        if gemini_response:
            memory.save_context({"input": user_input}, {"output": gemini_response})

    except Exception as e:
        error_msg = f"❌ Error: {str(e)}"
        logger.error(error_msg)
        await cl.Message(content=error_msg).send()
    finally:
        # Remove the "Analyzing..." message
        await analyzing_msg.remove()


@cl.on_chat_end
async def end():
    """Cleanup resources when chat ends."""
    try:
        analyzer = cl.user_session.get("analyzer")
        if analyzer:
            await analyzer.cleanup()
    except Exception as e:
        logger.error(f"Cleanup error: {str(e)}")


if __name__ == "__main__":
    cl.run(app, port=7860)