merasabkuch commited on
Commit
e8b2588
·
verified ·
1 Parent(s): c9715d7

Upload 10 files

Browse files
Files changed (10) hide show
  1. .env +4 -0
  2. .gitignore +4 -0
  3. Dockerfile +28 -0
  4. README.md +8 -10
  5. chats.db +0 -0
  6. db.py +54 -0
  7. main.py +373 -0
  8. old/MainPage copy.jsx +419 -0
  9. old/main_old.py +337 -0
  10. requirements.txt +18 -0
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ GOOGLE_CLIENT_ID=862058885628-e6mjev28p8e112qrp9gnn4q8mlif3bbf.apps.googleusercontent.com
2
+ GOOGLE_CLIENT_SECRET=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
3
+ GOOGLE_REDIRECT_URI=http://localhost:5173/callback
4
+ GEMINI_API_KEYS=AIzaSyAbYBc-lMZhgKujtDogbVmyoP0GtwzMmIQ,AIzaSyAwi5BZeHJZw8hU6xdlO3sY3VN_IaVnjuU,AIzaSyBV7muBFoEWL6GkSrf31KH_zgea-Vj25RY,AIzaSyB8BwqbD2wv2rnqaDtUfp5BVO2Afu3UZb0,AIzaSyD8QQjWPxyW1C8W2cDNFGlKpNiSvRz6Jpw,AIzaSyArv7uT7PeGRs17czv02PV7SENWfmEbXcE
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ venv/
3
+
4
+ __pycache__/
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python as base image
2
+ FROM python:3.10-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PYTHONDONTWRITEBYTECODE=1 \
7
+ CELERY_BROKER_URL=redis://redis:6379/0
8
+
9
+ # Set the working directory inside the container
10
+ WORKDIR /app
11
+
12
+ # Copy the project files into the container
13
+ COPY . /app
14
+
15
+ # Install system dependencies
16
+ RUN apt-get update && apt-get install -y \
17
+ gcc \
18
+ libpq-dev \
19
+ && rm -rf /var/lib/apt/lists/*
20
+
21
+ # Install Python dependencies
22
+ RUN pip install --upgrade pip && pip install -r requirements.txt
23
+
24
+ # Expose the application's port
25
+ EXPOSE 8000
26
+
27
+ # Start the FastAPI application
28
+ CMD ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "main:app", "--bind", "0.0.0.0:8000", "--workers", "4"]
README.md CHANGED
@@ -1,10 +1,8 @@
1
- ---
2
- title: Eduscope
3
- emoji: 🐢
4
- colorFrom: green
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: EDUCSCOPEAI
3
+ emoji: 🐳
4
+ colorFrom: purple
5
+ colorTo: gray
6
+ sdk: docker
7
+ app_port: 8000
8
+ ---
 
 
chats.db ADDED
Binary file (119 kB). View file
 
db.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine, Column, String, Text, ForeignKey, DateTime
2
+ from sqlalchemy.orm import sessionmaker, declarative_base, relationship
3
+ from datetime import datetime
4
+
5
+ # Database Configuration
6
+ DATABASE_URL = "sqlite:///./chats.db"
7
+ engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
8
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
9
+ Base = declarative_base()
10
+
11
+ class User(Base):
12
+ __tablename__ = "users"
13
+ id = Column(String, primary_key=True, unique=True, nullable=False)
14
+ email = Column(String, unique=True, nullable=False)
15
+ name = Column(String, nullable=False)
16
+ chats = relationship("Chat", back_populates="user", cascade="all, delete-orphan")
17
+
18
+ class Chat(Base):
19
+ __tablename__ = "chats"
20
+ chat_id = Column(String, primary_key=True, unique=True, nullable=False)
21
+ user_id = Column(String, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
22
+ title = Column(String, nullable=False)
23
+ timestamp = Column(DateTime, default=datetime.utcnow, nullable=False)
24
+ messages = relationship("ChatMessage", back_populates="chat", cascade="all, delete-orphan")
25
+ documents = relationship("Document", back_populates="chat", cascade="all, delete-orphan")
26
+ user = relationship("User", back_populates="chats")
27
+
28
+ class ChatMessage(Base):
29
+ __tablename__ = "chat_messages"
30
+ id = Column(String, primary_key=True, unique=True, nullable=False)
31
+ chat_id = Column(String, ForeignKey("chats.chat_id", ondelete="CASCADE"), nullable=False)
32
+ type = Column(String, nullable=False) # 'user' or 'assistant'
33
+ content = Column(Text, nullable=False)
34
+ timestamp = Column(DateTime, default=datetime.utcnow, nullable=False)
35
+ referenced_docs = Column(Text, nullable=True) # JSON string of referenced documents
36
+ chat = relationship("Chat", back_populates="messages")
37
+
38
+ class Document(Base):
39
+ __tablename__ = "documents"
40
+ id = Column(String, primary_key=True, unique=True, nullable=False)
41
+ chat_id = Column(String, ForeignKey("chats.chat_id", ondelete="CASCADE"), nullable=False)
42
+ name = Column(String, nullable=False)
43
+ content = Column(Text, nullable=False)
44
+ timestamp = Column(DateTime, default=datetime.utcnow, nullable=False)
45
+ chat = relationship("Chat", back_populates="documents")
46
+
47
+ Base.metadata.create_all(bind=engine)
48
+
49
+ def get_db():
50
+ db = SessionLocal()
51
+ try:
52
+ yield db
53
+ finally:
54
+ db.close()
main.py ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Header
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ import google.generativeai as genai
5
+ from typing import List
6
+ import os
7
+ from dotenv import load_dotenv
8
+ import io
9
+ from datetime import datetime, timedelta
10
+ import uuid
11
+
12
+ import json
13
+ import re
14
+
15
+ # File Format Libraries
16
+ import PyPDF2
17
+ import docx
18
+ import openpyxl
19
+ import csv
20
+ import io
21
+ import pptx
22
+ from db import get_db, Chat, ChatMessage, User, Document, SessionLocal
23
+
24
+ from fastapi.security import OAuth2PasswordBearer
25
+ import requests
26
+ from jose import jwt
27
+ import random
28
+
29
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
30
+
31
+ load_dotenv()
32
+
33
+ GOOGLE_CLIENT_ID = os.getenv('GOOGLE_CLIENT_ID')
34
+ GOOGLE_CLIENT_SECRET = os.getenv('GOOGLE_CLIENT_SECRET')
35
+ GOOGLE_REDIRECT_URI = os.getenv('GOOGLE_REDIRECT_URI')
36
+
37
+ api_keys = os.getenv('GEMINI_API_KEYS').split(',')
38
+
39
+
40
+
41
+ def parse_json_from_gemini(json_str: str):
42
+ try:
43
+ # Remove potential leading/trailing whitespace
44
+ json_str = json_str.strip()
45
+ # Extract JSON content from triple backticks and "json" language specifier
46
+ json_match = re.search(r"```json\s*(.*?)\s*```", json_str, re.DOTALL)
47
+ if json_match:
48
+ json_str = json_match.group(1)
49
+ return json.loads(json_str)
50
+ except (json.JSONDecodeError, AttributeError):
51
+ return None
52
+
53
+ load_dotenv()
54
+
55
+ app = FastAPI(title="EduScope AI")
56
+
57
+ # Configure CORS
58
+ app.add_middleware(
59
+ CORSMiddleware,
60
+ allow_origins=["*"],
61
+ allow_credentials=True,
62
+ allow_methods=["*"],
63
+ allow_headers=["*"],
64
+ )
65
+
66
+
67
+ @app.get("/login/google")
68
+ async def login_google():
69
+ return {
70
+ "url": f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI}&scope=openid%20profile%20email&access_type=offline"
71
+ }
72
+
73
+ @app.get("/auth/google")
74
+ async def auth_google(code: str, db: SessionLocal = Depends(get_db)):
75
+ token_url = "https://accounts.google.com/o/oauth2/token"
76
+ data = {
77
+ "code": code,
78
+ "client_id": GOOGLE_CLIENT_ID,
79
+ "client_secret": GOOGLE_CLIENT_SECRET,
80
+ "redirect_uri": GOOGLE_REDIRECT_URI,
81
+ "grant_type": "authorization_code",
82
+ }
83
+ response = requests.post(token_url, data=data)
84
+ access_token = response.json().get("access_token")
85
+ user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token}"}).json()
86
+ user = db.query(User).filter(User.id == user_info["id"]).first()
87
+ if not user:
88
+ user = User(id=user_info["id"], email=user_info["email"], name=user_info["name"])
89
+ db.add(user)
90
+ db.commit()
91
+
92
+ return {"token": jwt.encode(user_info, GOOGLE_CLIENT_SECRET, algorithm="HS256")}
93
+ # return user_info.json()
94
+
95
+
96
+ async def decode_token(authorization: str = Header(...)):
97
+ if not authorization.startswith("Bearer "):
98
+ raise HTTPException(
99
+ status_code=400,
100
+ detail="Authorization header must start with 'Bearer '"
101
+ )
102
+
103
+ token = authorization[len("Bearer "):] # Extract token part
104
+
105
+ try:
106
+ # Decode and verify the JWT token
107
+ token_data = jwt.decode(token, GOOGLE_CLIENT_SECRET, algorithms=["HS256"])
108
+ return token_data # Return decoded token data
109
+ except jwt.ExpiredSignatureError:
110
+ raise HTTPException(status_code=401, detail="Token has expired")
111
+ except jwt.InvalidTokenError:
112
+ raise HTTPException(status_code=401, detail="Invalid token")
113
+
114
+
115
+ @app.get("/token")
116
+ async def get_token(user_data: dict = Depends(decode_token)):
117
+ return user_data
118
+
119
+
120
+ @app.post("/chats")
121
+ async def create_chat(title: str, user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
122
+ user_id = user_data["id"]
123
+
124
+ chat = Chat(chat_id=str(uuid.uuid4()), user_id=user_id, title=title)
125
+ db.add(chat)
126
+ db.commit()
127
+ return {"chat_id": chat.chat_id, "title": title, "timestamp": chat.timestamp}
128
+
129
+
130
+ @app.get("/chats")
131
+ async def get_chats(user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
132
+ user_id = user_data["id"]
133
+
134
+ chats = db.query(Chat).filter(Chat.user_id == user_id).all()
135
+ return [{"chat_id": chat.chat_id, "title": chat.title, "timestamp": chat.timestamp} for chat in chats]
136
+
137
+
138
+
139
+ class DocumentSchema(BaseModel):
140
+ id: str
141
+ name: str
142
+ timestamp: str
143
+
144
+ class Query(BaseModel):
145
+ text: str
146
+ selected_docs: List[str]
147
+
148
+ class ChatMessageSchema(BaseModel):
149
+ id: str
150
+ type: str # 'user' or 'assistant'
151
+ content: str
152
+ timestamp: str
153
+ referenced_docs: List[str] = []
154
+
155
+ class Analysis(BaseModel):
156
+ insight: str
157
+ pareto_analysis: dict
158
+
159
+ def extract_text_from_file(file: UploadFile):
160
+ """
161
+ Extract text from various file types
162
+ Supports: PDF, DOCX, XLSX, CSV, TXT, PPTX
163
+ """
164
+ file_extension = os.path.splitext(file.filename)[1].lower()
165
+ content = file.file.read()
166
+ print(file_extension)
167
+
168
+ try:
169
+ if file_extension == '.pdf':
170
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(content))
171
+ text = "\n".join([page.extract_text() for page in pdf_reader.pages])
172
+
173
+ elif file_extension == '.docx':
174
+ doc = docx.Document(io.BytesIO(content))
175
+ text = "\n".join([para.text for para in doc.paragraphs])
176
+
177
+ elif file_extension == '.xlsx':
178
+ wb = openpyxl.load_workbook(io.BytesIO(content), read_only=True)
179
+ text = ""
180
+ for sheet in wb:
181
+ for row in sheet.iter_rows(values_only=True):
182
+ text += " ".join(str(cell) for cell in row if cell is not None) + "\n"
183
+
184
+ elif file_extension == '.csv':
185
+ csv_reader = csv.reader(io.StringIO(content.decode('utf-8')))
186
+ text = "\n".join([" ".join(row) for row in csv_reader])
187
+
188
+ elif file_extension == '.txt':
189
+ text = content.decode('utf-8')
190
+
191
+ elif file_extension in ['.ppt', '.pptx']:
192
+ ppt = pptx.Presentation(io.BytesIO(content))
193
+ text = ""
194
+ for slide in ppt.slides:
195
+ for shape in slide.shapes:
196
+ if hasattr(shape, "text"):
197
+ text += shape.text + "\n"
198
+
199
+ else:
200
+ raise ValueError(f"Unsupported file type: {file_extension}")
201
+
202
+ return text
203
+ except Exception as e:
204
+ raise HTTPException(status_code=400, detail=f"Error processing file: {str(e)}")
205
+
206
+ @app.post("/chats/{chat_id}/upload")
207
+ async def upload_document(chat_id: str, file: UploadFile = File(...), user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
208
+ user_id = user_data["id"]
209
+ # Check if the chat exists and belongs to the user
210
+ chat = db.query(Chat).filter(Chat.chat_id == chat_id, Chat.user_id == user_id).first()
211
+ if not chat:
212
+ raise HTTPException(status_code=404, detail="Chat not found")
213
+ try:
214
+ text = extract_text_from_file(file)
215
+ doc_id = str(uuid.uuid4())
216
+ document = Document(
217
+ id=doc_id,
218
+ chat_id=chat_id,
219
+ name=file.filename,
220
+ content=text,
221
+ timestamp=datetime.now()
222
+ )
223
+ db.add(document)
224
+ db.commit()
225
+ db.refresh(document)
226
+ return {
227
+ "id": document.id,
228
+ "name": document.name,
229
+ "timestamp": document.timestamp.isoformat()
230
+ }
231
+ except HTTPException as e:
232
+ raise e
233
+ except Exception as e:
234
+ raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
235
+
236
+ @app.get("/chats/{chat_id}/documents")
237
+ async def get_documents(chat_id: str, user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
238
+ user_id = user_data["id"]
239
+ chat = db.query(Chat).filter(Chat.chat_id == chat_id, Chat.user_id == user_id).first()
240
+ if not chat:
241
+ raise HTTPException(status_code=404, detail="Chat not found")
242
+ documents = db.query(Document).filter(Document.chat_id == chat_id).all()
243
+ return [{
244
+ "id": doc.id,
245
+ "name": doc.name,
246
+ "timestamp": doc.timestamp.isoformat()
247
+ } for doc in documents]
248
+
249
+ @app.post("/chats/{chat_id}/analyze", response_model=Analysis)
250
+ async def analyze_text(chat_id: str, query: Query, user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
251
+ user_id = user_data["id"]
252
+ # Check if the chat exists and belongs to the user
253
+ chat = db.query(Chat).filter(Chat.chat_id == chat_id, Chat.user_id == user_id).first()
254
+ if not chat:
255
+ raise HTTPException(status_code=404, detail="Chat not found")
256
+ # Fetch documents
257
+ docs = db.query(Document).filter(Document.chat_id == chat_id, Document.id.in_(query.selected_docs)).all()
258
+ if not docs:
259
+ raise HTTPException(status_code=400, detail="No documents found for analysis")
260
+ # Combine content from selected documents
261
+ combined_context = "\n\n".join([
262
+ f"Document '{doc.name}':\n{doc.content}" for doc in docs
263
+ ])
264
+
265
+ prompt = f"""
266
+ Analyze the following text in the context of this query: {query.text}
267
+
268
+ Context from multiple documents:
269
+ {combined_context}
270
+
271
+ Provide:
272
+ 1. Detailed insights and analysis, comparing information across documents when relevant
273
+ 2. Apply the Pareto Principle (80/20 rule) to identify the most important aspects
274
+
275
+ Format the response as JSON with 'insight' and 'pareto_analysis' keys.
276
+
277
+ Example format:
278
+ {{
279
+ "insight": "Key findings and analysis from the documents based on query...",
280
+ "pareto_analysis": {{
281
+ "vital_few": "The 20% of factors that drive 80% of the impact...",
282
+ "trivial_many": "The remaining 80% of factors that contribute 20% of the impact..."
283
+ }}
284
+ }}
285
+
286
+ also give a complete html document with a intreactive quiz (minimum 5 questions) using jquery and also a flashcards to help the user understand the content better.
287
+ """
288
+
289
+ api_key = random.choice(api_keys)
290
+ genai.configure(api_key=api_key)
291
+ print("Selected API Key: ", api_key)
292
+
293
+ model = genai.GenerativeModel('gemini-1.5-flash')
294
+
295
+ response = model.generate_content(prompt)
296
+ response_text = response.text
297
+
298
+ # Save user message
299
+ user_message = ChatMessage(
300
+ id=str(uuid.uuid4()),
301
+ chat_id=chat_id,
302
+ type="user",
303
+ content=query.text,
304
+ timestamp=datetime.now(),
305
+ referenced_docs=json.dumps(query.selected_docs)
306
+ )
307
+ db.add(user_message)
308
+ # Parse analysis
309
+ analysis = parse_json_from_gemini(response_text)
310
+ # Save assistant message
311
+
312
+ assistant_message = ChatMessage(
313
+ id=str(uuid.uuid4()),
314
+ chat_id=chat_id,
315
+ type="assistant",
316
+ content=json.dumps(analysis, indent=4),
317
+ timestamp=datetime.now() -timedelta(seconds=3),
318
+ referenced_docs=json.dumps(query.selected_docs)
319
+ )
320
+
321
+ db.add(assistant_message)
322
+
323
+ if '```html' in response_text:
324
+ html = response_text.split('```html')[1]
325
+ html = html.split('```')[0]
326
+ html = html.strip()
327
+ assistant_message_1 = ChatMessage(
328
+ id=str(uuid.uuid4()),
329
+ chat_id=chat_id,
330
+ type="assistant",
331
+ content=html,
332
+ timestamp=datetime.now(),
333
+ referenced_docs=json.dumps(query.selected_docs)
334
+ )
335
+
336
+ db.add(assistant_message_1)
337
+
338
+ db.commit()
339
+ return analysis
340
+
341
+ @app.get("/chats/{chat_id}/chat-history")
342
+ async def get_chat_history(chat_id: str, user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
343
+ user_id = user_data["id"]
344
+ # Check if the chat exists and belongs to the user
345
+ chat = db.query(Chat).filter(Chat.chat_id == chat_id, Chat.user_id == user_id).first()
346
+ if not chat:
347
+ raise HTTPException(status_code=404, detail="Chat not found")
348
+ messages = db.query(ChatMessage).filter(ChatMessage.chat_id == chat_id).order_by(ChatMessage.timestamp).all()
349
+ return [{
350
+ "id": msg.id,
351
+ "type": msg.type,
352
+ "content": msg.content,
353
+ "timestamp": msg.timestamp.isoformat(),
354
+ "referenced_docs": json.loads(msg.referenced_docs) if msg.referenced_docs else []
355
+ } for msg in messages]
356
+
357
+ @app.delete("/chats/{chat_id}/clear")
358
+ async def clear_chat(chat_id: str, user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
359
+ user_id = user_data["id"]
360
+ chat = db.query(Chat).filter(Chat.chat_id == chat_id, Chat.user_id == user_id).first()
361
+ if not chat:
362
+ raise HTTPException(status_code=404, detail="Chat not found")
363
+ # Delete documents and messages
364
+ db.query(Document).filter(Document.chat_id == chat_id).delete()
365
+ db.query(ChatMessage).filter(ChatMessage.chat_id == chat_id).delete()
366
+ db.commit()
367
+ return {"message": "Chat cleared successfully"}
368
+
369
+
370
+
371
+ if __name__ == "__main__":
372
+ import uvicorn
373
+ uvicorn.run(app, host="0.0.0.0", port=8000)
old/MainPage copy.jsx ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from 'react';
2
+ import { motion, AnimatePresence } from 'framer-motion';
3
+ import { Upload, BookOpen, Search, File, Send, Trash2, Gem, Loader2 } from 'lucide-react';
4
+ import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
5
+ import { Button } from '@/components/ui/button';
6
+ import { Textarea } from '@/components/ui/textarea';
7
+ import { Alert, AlertTitle, AlertDescription } from '@/components/ui/alert';
8
+ import { Checkbox } from '@/components/ui/checkbox';
9
+ import { toast, Toaster } from 'sonner';
10
+ import GeminiResponseDisplay from './GeminiResponse';
11
+
12
+ const FileTypeIcons = {
13
+ '.pdf': File,
14
+ '.docx': File,
15
+ '.xlsx': File,
16
+ '.csv': File,
17
+ '.txt': File,
18
+ '.ppt': File,
19
+ '.pptx': File
20
+ };
21
+
22
+ const containerVariants = {
23
+ hidden: { opacity: 0 },
24
+ visible: {
25
+ opacity: 1,
26
+ transition: {
27
+ delayChildren: 0.2,
28
+ staggerChildren: 0.1
29
+ }
30
+ }
31
+ };
32
+
33
+ const itemVariants = {
34
+ hidden: { y: 20, opacity: 0 },
35
+ visible: {
36
+ y: 0,
37
+ opacity: 1,
38
+ transition: {
39
+ type: "spring",
40
+ stiffness: 300,
41
+ damping: 24
42
+ }
43
+ }
44
+ };
45
+
46
+ const chatMessageVariants = {
47
+ hidden: { opacity: 0, x: -20 },
48
+ visible: {
49
+ opacity: 1,
50
+ x: 0,
51
+ transition: {
52
+ type: "tween",
53
+ duration: 0.3
54
+ }
55
+ }
56
+ };
57
+
58
+ const MainPage = () => {
59
+ // State Management
60
+ const [documents, setDocuments] = useState([]);
61
+ const [query, setQuery] = useState('');
62
+ const [chatHistory, setChatHistory] = useState([]);
63
+ const [loading, setLoading] = useState(false);
64
+ const [selectedDocs, setSelectedDocs] = useState([]);
65
+
66
+ // Error Handling
67
+ const showError = (message, description = '') => {
68
+ toast.error(message, {
69
+ description,
70
+ duration: 4000
71
+ });
72
+ };
73
+
74
+ const showSuccess = (message, description = '') => {
75
+ toast.success(message, {
76
+ description,
77
+ duration: 3000
78
+ });
79
+ };
80
+
81
+ // Fetch Initial Data
82
+ useEffect(() => {
83
+ fetchDocuments();
84
+ fetchChatHistory();
85
+ }, []);
86
+
87
+ const fetchDocuments = async () => {
88
+ try {
89
+ const response = await fetch('http://localhost:8000/documents');
90
+ const data = await response.json();
91
+ setDocuments(data);
92
+ } catch (err) {
93
+ showError('Failed to fetch documents', err.message);
94
+ }
95
+ };
96
+
97
+ const fetchChatHistory = async () => {
98
+ try {
99
+ const response = await fetch('http://localhost:8000/chat-history');
100
+ const data = await response.json();
101
+ setChatHistory(data);
102
+ } catch (err) {
103
+ showError('Failed to fetch chat history', err.message);
104
+ }
105
+ };
106
+
107
+ const ALLOWED_TYPES = ['.pdf', '.docx', '.xlsx', '.csv', '.txt','.ppt', '.pptx'];
108
+
109
+ const handleFileUpload = async (event) => {
110
+ const file = event.target.files[0];
111
+ if (!file) return;
112
+
113
+ // File Validation
114
+ const MAX_FILE_SIZE = 35 * 1024 * 1024; // 35MB
115
+ const fileExtension = '.' + file.name.split('.').pop().toLowerCase();
116
+
117
+ if (file.size > MAX_FILE_SIZE) {
118
+ showError('File Too Large', 'Maximum file size is 35MB');
119
+ return;
120
+ }
121
+
122
+ if (!ALLOWED_TYPES.includes(fileExtension)) {
123
+ showError('Unsupported File Type', `Supported: ${ALLOWED_TYPES.join(', ')}`);
124
+ return;
125
+ }
126
+
127
+ const formData = new FormData();
128
+ formData.append('file', file);
129
+
130
+ try {
131
+ setLoading(true);
132
+ const response = await fetch('http://localhost:8000/upload', {
133
+ method: 'POST',
134
+ body: formData,
135
+ });
136
+
137
+ if (!response.ok) throw new Error('Upload failed');
138
+
139
+ const data = await response.json();
140
+ setDocuments([...documents, data]);
141
+ showSuccess('Document Uploaded', `${file.name} processed successfully`);
142
+ } catch (err) {
143
+ showError('Failed to upload document', err.message);
144
+ } finally {
145
+ setLoading(false);
146
+ }
147
+ };
148
+
149
+ const handleClearAll = async () => {
150
+ try {
151
+ setLoading(true);
152
+ const response = await fetch('http://localhost:8000/clear-all', {
153
+ method: 'GET',
154
+ });
155
+
156
+ if (!response.ok) throw new Error('Clear all failed');
157
+
158
+ setDocuments([]);
159
+ setChatHistory([]);
160
+ setSelectedDocs([]);
161
+ showSuccess('Data Cleared', 'All documents and chat history removed');
162
+ } catch (err) {
163
+ showError('Failed to clear all', err.message);
164
+ } finally {
165
+ setLoading(false);
166
+ }
167
+ };
168
+
169
+ const handleAnalyze = async () => {
170
+ if (!selectedDocs.length || !query) {
171
+ showError('Incomplete Request', 'Select documents and enter a query');
172
+ return;
173
+ }
174
+
175
+ try {
176
+ setLoading(true);
177
+ const response = await fetch('http://localhost:8000/analyze', {
178
+ method: 'POST',
179
+ headers: {
180
+ 'Content-Type': 'application/json',
181
+ },
182
+ body: JSON.stringify({
183
+ text: query,
184
+ selected_docs: selectedDocs,
185
+ }),
186
+ });
187
+
188
+ if (!response.ok) throw new Error('Analysis failed');
189
+
190
+ await fetchChatHistory();
191
+ setQuery('');
192
+ showSuccess('Analysis Complete', 'Results are available in chat history');
193
+ } catch (err) {
194
+ showError('Failed to analyze', err.message);
195
+ } finally {
196
+ setLoading(false);
197
+ }
198
+ };
199
+
200
+ const formatTimestamp = (timestamp) => {
201
+ return new Date(timestamp).toLocaleString();
202
+ };
203
+
204
+ return (
205
+ <motion.div
206
+ initial="hidden"
207
+ animate="visible"
208
+ variants={containerVariants}
209
+ className="min-h-screen bg-gray-100 p-8"
210
+ >
211
+ <Toaster position="top-right" />
212
+ <motion.div
213
+ variants={itemVariants}
214
+ className="max-w-6xl mx-auto space-y-6"
215
+ >
216
+ <Card>
217
+ <CardHeader>
218
+ <motion.div
219
+ variants={itemVariants}
220
+ className="flex items-center justify-between"
221
+ >
222
+ <CardTitle className="text-2xl font-bold flex items-center gap-2">
223
+ <BookOpen className="w-6 h-6" />
224
+ EduScope AI
225
+ </CardTitle>
226
+ <motion.div
227
+ whileHover={{ scale: 1.05 }}
228
+ whileTap={{ scale: 0.95 }}
229
+ >
230
+ <Gem className="w-6 h-6 text-purple-600" />
231
+ </motion.div>
232
+ </motion.div>
233
+ </CardHeader>
234
+ <CardContent>
235
+ <div className="grid grid-cols-12 gap-6">
236
+ {/* Document Management Sidebar */}
237
+ <motion.div
238
+ variants={itemVariants}
239
+ className="col-span-4 space-y-4"
240
+ >
241
+ <Card>
242
+ <CardHeader>
243
+ <CardTitle className="text-lg">Documents</CardTitle>
244
+ </CardHeader>
245
+ <CardContent>
246
+ <div className="space-y-4">
247
+ <motion.div
248
+ whileHover={{ scale: 1.02 }}
249
+ whileTap={{ scale: 0.98 }}
250
+ >
251
+ <Button
252
+ variant="outline"
253
+ onClick={() => document.getElementById('file-upload').click()}
254
+ className="w-full"
255
+ >
256
+ <Upload className="w-4 h-4 mr-2" />
257
+ Upload Document
258
+ </Button>
259
+ </motion.div>
260
+
261
+ <motion.div
262
+ whileHover={{ scale: 1.02 }}
263
+ whileTap={{ scale: 0.98 }}
264
+ >
265
+ <Button
266
+ onClick={handleClearAll}
267
+ disabled={loading}
268
+ className="w-full"
269
+ >
270
+ {loading ? (
271
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
272
+ ) : (
273
+ <Trash2 className="w-4 h-4 mr-2" />
274
+ )}
275
+ Clear All
276
+ </Button>
277
+ </motion.div>
278
+
279
+ <input
280
+ id="file-upload"
281
+ type="file"
282
+ accept={ALLOWED_TYPES.join(',')}
283
+ className="hidden"
284
+ onChange={handleFileUpload}
285
+ />
286
+
287
+ <motion.div
288
+ variants={containerVariants}
289
+ className="space-y-2"
290
+ >
291
+ <AnimatePresence>
292
+ {documents.map((doc) => {
293
+ const FileIcon = FileTypeIcons[`.${doc.name.split('.').pop().toLowerCase()}`] || File;
294
+ return (
295
+ <motion.div
296
+ key={doc.id}
297
+ initial={{ opacity: 0, x: -20 }}
298
+ animate={{ opacity: 1, x: 0 }}
299
+ exit={{ opacity: 0, x: 20 }}
300
+ transition={{ type: "spring", stiffness: 300, damping: 30 }}
301
+ className="flex items-center space-x-2"
302
+ >
303
+ <Checkbox
304
+ checked={selectedDocs.includes(doc.id)}
305
+ onCheckedChange={(checked) => {
306
+ if (checked) {
307
+ setSelectedDocs([...selectedDocs, doc.id]);
308
+ } else {
309
+ setSelectedDocs(selectedDocs.filter(id => id !== doc.id));
310
+ }
311
+ }}
312
+ />
313
+ <div className="flex items-center space-x-2">
314
+ <FileIcon className="w-4 h-4" />
315
+ <span className="text-sm truncate">{doc.name}</span>
316
+ </div>
317
+ </motion.div>
318
+ );
319
+ })}
320
+ </AnimatePresence>
321
+ </motion.div>
322
+ </div>
323
+ </CardContent>
324
+ </Card>
325
+ </motion.div>
326
+
327
+ {/* Chat Interface */}
328
+ <motion.div
329
+ variants={itemVariants}
330
+ className="col-span-8 space-y-4"
331
+ >
332
+ <motion.div
333
+ className="h-[500px] overflow-y-auto bg-white rounded-lg p-4 border"
334
+ initial={{ opacity: 0 }}
335
+ animate={{ opacity: 1 }}
336
+ >
337
+ <AnimatePresence>
338
+ {chatHistory.map((message) => (
339
+ <motion.div
340
+ key={message.id}
341
+ variants={chatMessageVariants}
342
+ initial="hidden"
343
+ animate="visible"
344
+ exit={{ opacity: 0, x: 20 }}
345
+ className={`mb-4 ${message.type === 'assistant' ? 'ml-4' : 'mr-4'}`}
346
+ >
347
+ <div
348
+ className={`p-3 rounded-lg ${
349
+ message.type === 'assistant'
350
+ ? 'bg-blue-100'
351
+ : 'bg-gray-100'
352
+ }`}
353
+ >
354
+ <div className="text-sm text-gray-500 mb-1">
355
+ {message.type === 'assistant' ? 'AI Assistant' : 'You'} •{' '}
356
+ {formatTimestamp(message.timestamp)}
357
+ </div>
358
+ <div className="text-gray-800">
359
+ {message.content.includes('pareto_analysis') || message.content.includes('<html')
360
+ ? <GeminiResponseDisplay responseStr={message.content} />
361
+ : message.content}
362
+ </div>
363
+ {message.referenced_docs.length > 0 && (
364
+ <div className="text-xs text-gray-500 mt-2">
365
+ Referenced documents:{' '}
366
+ {message.referenced_docs
367
+ .map(
368
+ (docId) =>
369
+ documents.find((d) => d.id === docId)?.name
370
+ )
371
+ .join(', ')}
372
+ </div>
373
+ )}
374
+ </div>
375
+ </motion.div>
376
+ ))}
377
+ </AnimatePresence>
378
+ </motion.div>
379
+
380
+ <motion.div
381
+ variants={itemVariants}
382
+ className="flex gap-2"
383
+ >
384
+ <Textarea
385
+ value={query}
386
+ onChange={(e) => setQuery(e.target.value)}
387
+ placeholder="Ask a question about the selected documents..."
388
+ className="flex-1"
389
+ />
390
+ <motion.div
391
+ whileHover={{ scale: 1.05 }}
392
+ whileTap={{ scale: 0.95 }}
393
+ >
394
+ <Button
395
+ onClick={handleAnalyze}
396
+ disabled={loading}
397
+ className="self-end"
398
+ >
399
+ {loading ? (
400
+ <Loader2 className="w-4 h-4 mr-2 animate-spin" />
401
+ ) : (
402
+ <>
403
+ <Send className="w-4 h-4 mr-2" />
404
+ Send
405
+ </>
406
+ )}
407
+ </Button>
408
+ </motion.div>
409
+ </motion.div>
410
+ </motion.div>
411
+ </div>
412
+ </CardContent>
413
+ </Card>
414
+ </motion.div>
415
+ </motion.div>
416
+ );
417
+ };
418
+
419
+ export default MainPage;
old/main_old.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ from fastapi import FastAPI, UploadFile, File, HTTPException,Depends,Header
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import BaseModel
5
+ import google.generativeai as genai
6
+ from typing import List, Dict
7
+ import os
8
+ from dotenv import load_dotenv
9
+ import io
10
+ from datetime import datetime
11
+ import uuid
12
+
13
+ import json
14
+ import re
15
+
16
+ # File Format Libraries
17
+ import PyPDF2
18
+ import docx
19
+ import openpyxl
20
+ import csv
21
+ import io
22
+ import pptx
23
+ from db import get_db,Chat,ChatMessage,User,SessionLocal
24
+
25
+
26
+ from fastapi.security import OAuth2PasswordBearer
27
+ import requests
28
+ from jose import jwt
29
+
30
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
31
+
32
+ DOMAIN = "http://localhost:8000"
33
+ # Replace these with your own values from the Google Developer Console
34
+ GOOGLE_CLIENT_ID = "862058885628-e6mjev28p8e112qrp9gnn4q8mlif3bbf.apps.googleusercontent.com"
35
+ GOOGLE_CLIENT_SECRET = "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"
36
+ GOOGLE_REDIRECT_URI = "http://localhost:5173/callback"
37
+
38
+
39
+ def parse_json_from_gemini(json_str: str):
40
+ try:
41
+ # Remove potential leading/trailing whitespace
42
+ json_str = json_str.strip()
43
+ # Extract JSON content from triple backticks and "json" language specifier
44
+ json_match = re.search(r"```json\s*(.*?)\s*```", json_str, re.DOTALL)
45
+ if json_match:
46
+ json_str = json_match.group(1)
47
+ return json.loads(json_str)
48
+ except (json.JSONDecodeError, AttributeError):
49
+ return None
50
+
51
+ load_dotenv()
52
+
53
+ app = FastAPI(title="EduScope AI")
54
+
55
+ # Configure CORS
56
+ app.add_middleware(
57
+ CORSMiddleware,
58
+ allow_origins=["*"],
59
+ allow_credentials=True,
60
+ allow_methods=["*"],
61
+ allow_headers=["*"],
62
+ )
63
+
64
+
65
+ @app.get("/login/google")
66
+ async def login_google():
67
+ return {
68
+ "url": f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI}&scope=openid%20profile%20email&access_type=offline"
69
+ }
70
+
71
+ @app.get("/auth/google")
72
+ async def auth_google(code: str, db: SessionLocal = Depends(get_db)):
73
+ token_url = "https://accounts.google.com/o/oauth2/token"
74
+ data = {
75
+ "code": code,
76
+ "client_id": GOOGLE_CLIENT_ID,
77
+ "client_secret": GOOGLE_CLIENT_SECRET,
78
+ "redirect_uri": GOOGLE_REDIRECT_URI,
79
+ "grant_type": "authorization_code",
80
+ }
81
+ response = requests.post(token_url, data=data)
82
+ access_token = response.json().get("access_token")
83
+ user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token}"}).json()
84
+ user = db.query(User).filter(User.id == user_info["id"]).first()
85
+ if not user:
86
+ user = User(id=user_info["id"], email=user_info["email"], name=user_info["name"])
87
+ db.add(user)
88
+ db.commit()
89
+
90
+ return {"token": jwt.encode(user_info, GOOGLE_CLIENT_SECRET, algorithm="HS256")}
91
+ # return user_info.json()
92
+
93
+
94
+ async def decode_token(authorization: str = Header(...)):
95
+ if not authorization.startswith("Bearer "):
96
+ raise HTTPException(
97
+ status_code=400,
98
+ detail="Authorization header must start with 'Bearer '"
99
+ )
100
+
101
+ token = authorization[len("Bearer "):] # Extract token part
102
+
103
+ try:
104
+ # Decode and verify the JWT token
105
+ token_data = jwt.decode(token, GOOGLE_CLIENT_SECRET, algorithms=["HS256"])
106
+ return token_data # Return decoded token data
107
+ except jwt.ExpiredSignatureError:
108
+ raise HTTPException(status_code=401, detail="Token has expired")
109
+ except jwt.InvalidTokenError:
110
+ raise HTTPException(status_code=401, detail="Invalid token")
111
+
112
+
113
+ @app.get("/token")
114
+ async def get_token(user_data: dict = Depends(decode_token)):
115
+ return user_data
116
+
117
+
118
+ @app.post("/chats")
119
+ async def create_chat(title: str,user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
120
+ user_id = user_data["id"]
121
+
122
+ chat = Chat(chat_id=str(uuid.uuid4()), user_id=user_id, title=title)
123
+ db.add(chat)
124
+ db.commit()
125
+ return {"chat_id": chat.chat_id, "title": title, "timestamp": chat.timestamp}
126
+
127
+
128
+ @app.get("/chats")
129
+ async def get_chats(user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
130
+ user_id = user_data["id"]
131
+
132
+ chats = db.query(Chat).filter(Chat.user_id == user_id).all()
133
+ return [{"chat_id": chat.chat_id, "title": chat.title, "timestamp": chat.timestamp} for chat in chats]
134
+
135
+
136
+ genai.configure(api_key="AIzaSyDZsN3hnnNQOBLSAznFh7xWbWKNohvqff0")
137
+ model = genai.GenerativeModel('gemini-1.5-flash')
138
+
139
+ documents = {}
140
+ chat_history = []
141
+
142
+ class Document(BaseModel):
143
+ id: str
144
+ name: str
145
+ content: str
146
+ timestamp: str
147
+
148
+ class Query(BaseModel):
149
+ text: str
150
+ selected_docs: List[str]
151
+
152
+ class ChatMessage(BaseModel):
153
+ id: str
154
+ type: str # 'user' or 'assistant'
155
+ content: str
156
+ timestamp: str
157
+ referenced_docs: List[str] = []
158
+
159
+
160
+
161
+ class Analysis(BaseModel):
162
+ insight: str
163
+ pareto_analysis: dict
164
+
165
+ def extract_text_from_file(file: UploadFile):
166
+ """
167
+ Extract text from various file types
168
+ Supports: PDF, DOCX, XLSX, CSV, TXT
169
+ """
170
+ file_extension = os.path.splitext(file.filename)[1].lower()
171
+ content = file.file.read()
172
+
173
+ try:
174
+ if file_extension == '.pdf':
175
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(content))
176
+ text = "\n".join([page.extract_text() for page in pdf_reader.pages])
177
+
178
+ elif file_extension == '.docx':
179
+ doc = docx.Document(io.BytesIO(content))
180
+ text = "\n".join([para.text for para in doc.paragraphs])
181
+
182
+ elif file_extension == '.xlsx':
183
+ wb = openpyxl.load_workbook(io.BytesIO(content), read_only=True)
184
+ text = ""
185
+ for sheet in wb:
186
+ for row in sheet.iter_rows(values_only=True):
187
+ text += " ".join(str(cell) for cell in row if cell is not None) + "\n"
188
+
189
+ elif file_extension == '.csv':
190
+ csv_reader = csv.reader(io.StringIO(content.decode('utf-8')))
191
+ text = "\n".join([" ".join(row) for row in csv_reader])
192
+
193
+ elif file_extension == '.txt':
194
+ text = content.decode('utf-8')
195
+
196
+ elif file_extension in ['.ppt', '.pptx']:
197
+ ppt = pptx.Presentation(io.BytesIO(content))
198
+ text = ""
199
+ for slide in ppt.slides:
200
+ for shape in slide.shapes:
201
+ if hasattr(shape, "text"):
202
+ text += shape.text + "\n"
203
+
204
+ else:
205
+ raise ValueError(f"Unsupported file type: {file_extension}")
206
+
207
+ return text
208
+ except Exception as e:
209
+ raise HTTPException(status_code=400, detail=f"Error processing file: {str(e)}")
210
+
211
+ @app.post("/upload")
212
+ async def upload_document(file: UploadFile = File(...)):
213
+ try:
214
+ text = extract_text_from_file(file)
215
+
216
+ doc_id = str(uuid.uuid4())
217
+ document = Document(
218
+ id=doc_id,
219
+ name=file.filename,
220
+ content=text,
221
+ timestamp=datetime.now().isoformat()
222
+ )
223
+ documents[doc_id] = document
224
+
225
+ return document.dict()
226
+ except HTTPException as e:
227
+ raise e
228
+ except Exception as e:
229
+ raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
230
+
231
+ @app.get("/documents")
232
+ async def get_documents():
233
+ return list(documents.values())
234
+
235
+ @app.post("/analyze", response_model=Analysis)
236
+ async def analyze_text(query: Query):
237
+ # try:
238
+ # Combine content from selected documents
239
+ combined_context = "\n\n".join([
240
+ f"Document '{documents[doc_id].name}':\n{documents[doc_id].content}"
241
+ for doc_id in query.selected_docs
242
+ ])
243
+
244
+ prompt = f"""
245
+ Analyze the following text in the context of this query: {query.text}
246
+
247
+ Context from multiple documents:
248
+ {combined_context}
249
+
250
+ Provide:
251
+ 1. Detailed insights and analysis, comparing information across documents when relevant
252
+ 2. Apply the Pareto Principle (80/20 rule) to identify the most important aspects
253
+
254
+ Format the response as JSON with 'insight' and 'pareto_analysis' keys.
255
+
256
+ Example format:
257
+ {{
258
+ "insight": "Key findings and analysis from the documents...",
259
+ "pareto_analysis": {{
260
+ "vital_few": "The 20% of factors that drive 80% of the impact...",
261
+ "trivial_many": "The remaining 80% of factors that contribute 20% of the impact..."
262
+ }}
263
+ }}
264
+
265
+ also give a complete html document with the illustrative analysis like pie charts, bar charts,graphs etc.
266
+ """
267
+ response = model.generate_content(prompt)
268
+ response_text = response.text
269
+ # print(response_text)
270
+
271
+
272
+ # Create chat message
273
+ message = ChatMessage(
274
+ id=str(uuid.uuid4()),
275
+ type="user",
276
+ content=query.text,
277
+ timestamp=datetime.now().isoformat(),
278
+ referenced_docs=query.selected_docs
279
+ )
280
+ chat_history.append(message)
281
+
282
+ # print(response_text)
283
+ # Create assistant response
284
+ # analysis = {
285
+ # "insight": response_text.split("Pareto Analysis:")[0].strip(),
286
+ # "pareto_analysis": {
287
+ # "vital_few": response_text.split("Vital Few (20%):")[1].split("Trivial Many")[0].strip(),
288
+ # "trivial_many": response_text.split("Trivial Many (80%):")[1].strip()
289
+ # }
290
+ # }
291
+ analysis = parse_json_from_gemini(response_text)
292
+
293
+ assistant_message = ChatMessage(
294
+ id=str(uuid.uuid4()),
295
+ type="assistant",
296
+ content=json.dumps(analysis, indent=4),
297
+ timestamp=datetime.now().isoformat(),
298
+ referenced_docs=query.selected_docs
299
+ )
300
+ chat_history.append(assistant_message)
301
+
302
+
303
+
304
+
305
+ if '```html' in response_text:
306
+ html = response_text.split('```html')[1]
307
+ html = html.split('```')[0]
308
+ html = html.strip()
309
+ assistant_message = ChatMessage(
310
+ id=str(uuid.uuid4()),
311
+ type="assistant",
312
+ content=html,
313
+ timestamp=datetime.now().isoformat(),
314
+ referenced_docs=query.selected_docs
315
+ )
316
+ chat_history.append(assistant_message)
317
+
318
+ return analysis
319
+ # except Exception as e:
320
+ # raise HTTPException(status_code=500, detail=str(e))
321
+
322
+ @app.get("/chat-history")
323
+ async def get_chat_history():
324
+ return chat_history
325
+
326
+ @app.get("/clear-all")
327
+ async def clear_all():
328
+ chat_history.clear()
329
+ documents.clear()
330
+ return {"message": "All Data cleared successfully"}
331
+
332
+
333
+
334
+
335
+ if __name__ == "__main__":
336
+ import uvicorn
337
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ google-generativeai
5
+ python-dotenv
6
+ PyPDF2
7
+ pydantic
8
+ uuid
9
+ starlette
10
+ typing-extensions
11
+ python-docx
12
+ openpyxl
13
+ python-multipart
14
+ python-jose[cryptography]
15
+ requests
16
+ sqlalchemy
17
+ python-dotenv
18
+ sqlalchemy-utils