IZERE HIRWA Roger commited on
Commit
ed74fda
·
1 Parent(s): 08cd1f0
Files changed (8) hide show
  1. Dockerfile +27 -0
  2. app.py +431 -0
  3. main.py +431 -0
  4. requirements.txt +19 -0
  5. space.yaml +1 -0
  6. static/index.html +161 -0
  7. static/script.js +458 -0
  8. static/styles.css +372 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ # Create writable directories
11
+ RUN mkdir -p /app/instance && chmod -R 777 /app/instance
12
+ ENV HF_HOME=/app/transformers_cache
13
+ RUN mkdir -p /app/transformers_cache && chmod -R 777 /app/transformers_cache
14
+
15
+ # Create ../data directory for vector store
16
+ RUN mkdir -p /app/data && chmod -R 777 /app/data
17
+ RUN mkdir -p /data && chmod -R 777 /data
18
+
19
+ # Create uploads directory
20
+ RUN mkdir -p /app/uploads && chmod -R 777 /app/uploads
21
+
22
+ # Create logs directory
23
+ RUN mkdir -p /app/logs && chmod -R 777 /app/logs
24
+
25
+ EXPOSE 7860
26
+
27
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Depends, status
2
+ from fastapi.responses import HTMLResponse, JSONResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
5
+ import pytesseract
6
+ from PIL import Image
7
+ import numpy as np
8
+ import faiss
9
+ import os
10
+ import pickle
11
+ from pdf2image import convert_from_bytes
12
+ import torch
13
+ import clip
14
+ import io
15
+ import json
16
+ import uuid
17
+ from datetime import datetime, timedelta
18
+ from typing import List, Dict, Any, Optional
19
+ import base64
20
+ import jwt
21
+ from passlib.context import CryptContext
22
+
23
+ app = FastAPI(title="Handwritten Archive Document Digitalization System")
24
+
25
+ # Security configuration
26
+ SECRET_KEY = "your-secret-key-change-this-in-production"
27
+ ALGORITHM = "HS256"
28
+ ACCESS_TOKEN_EXPIRE_MINUTES = 30
29
+
30
+ pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
31
+ security = HTTPBearer()
32
+
33
+ # Default admin user (change in production)
34
+ USERS_DB = {
35
+ "admin": {
36
+ "username": "admin",
37
+ "hashed_password": pwd_context.hash("admin123"),
38
+ "is_active": True
39
+ }
40
+ }
41
+
42
+ # Mount static files
43
+ app.mount("/static", StaticFiles(directory="static"), name="static")
44
+
45
+ # --- Load or Initialize Model/Index ---
46
+ device = "cuda" if torch.cuda.is_available() else "cpu"
47
+ clip_model, preprocess = clip.load("ViT-B/32", device=device)
48
+
49
+ INDEX_PATH = "data/index.faiss"
50
+ LABELS_PATH = "data/labels.pkl"
51
+ DOCUMENTS_PATH = "data/documents.json"
52
+ UPLOADS_DIR = "data/uploads"
53
+
54
+ # Ensure directories exist
55
+ os.makedirs("data", exist_ok=True)
56
+ os.makedirs("static", exist_ok=True)
57
+ os.makedirs(UPLOADS_DIR, exist_ok=True)
58
+
59
+ # Initialize index and labels with error handling
60
+ index = faiss.IndexFlatL2(512)
61
+ labels = []
62
+ documents = []
63
+
64
+ if os.path.exists(INDEX_PATH) and os.path.exists(LABELS_PATH):
65
+ try:
66
+ index = faiss.read_index(INDEX_PATH)
67
+ with open(LABELS_PATH, "rb") as f:
68
+ labels = pickle.load(f)
69
+ print(f"✅ Loaded existing index with {len(labels)} labels")
70
+ except (RuntimeError, EOFError, pickle.UnpicklingError) as e:
71
+ print(f"⚠️ Failed to load existing index: {e}")
72
+ print("🔄 Starting with fresh index")
73
+ if os.path.exists(INDEX_PATH):
74
+ os.remove(INDEX_PATH)
75
+ if os.path.exists(LABELS_PATH):
76
+ os.remove(LABELS_PATH)
77
+
78
+ # Load documents database
79
+ if os.path.exists(DOCUMENTS_PATH):
80
+ try:
81
+ with open(DOCUMENTS_PATH, 'r') as f:
82
+ documents = json.load(f)
83
+ except:
84
+ documents = []
85
+
86
+ # Authentication functions
87
+ def verify_password(plain_password, hashed_password):
88
+ return pwd_context.verify(plain_password, hashed_password)
89
+
90
+ def get_password_hash(password):
91
+ return pwd_context.hash(password)
92
+
93
+ def authenticate_user(username: str, password: str):
94
+ user = USERS_DB.get(username)
95
+ if not user or not verify_password(password, user["hashed_password"]):
96
+ return False
97
+ return user
98
+
99
+ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
100
+ to_encode = data.copy()
101
+ if expires_delta:
102
+ expire = datetime.utcnow() + expires_delta
103
+ else:
104
+ expire = datetime.utcnow() + timedelta(minutes=15)
105
+ to_encode.update({"exp": expire})
106
+ encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
107
+ return encoded_jwt
108
+
109
+ async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
110
+ credentials_exception = HTTPException(
111
+ status_code=status.HTTP_401_UNAUTHORIZED,
112
+ detail="Could not validate credentials",
113
+ headers={"WWW-Authenticate": "Bearer"},
114
+ )
115
+ try:
116
+ payload = jwt.decode(credentials.credentials, SECRET_KEY, algorithms=[ALGORITHM])
117
+ username: str = payload.get("sub")
118
+ if username is None:
119
+ raise credentials_exception
120
+ except jwt.PyJWTError:
121
+ raise credentials_exception
122
+
123
+ user = USERS_DB.get(username)
124
+ if user is None:
125
+ raise credentials_exception
126
+ return user
127
+
128
+ # --- Utilities ---
129
+ def save_index():
130
+ try:
131
+ os.makedirs("data", exist_ok=True)
132
+ faiss.write_index(index, INDEX_PATH)
133
+ with open(LABELS_PATH, "wb") as f:
134
+ pickle.dump(labels, f)
135
+ except Exception as e:
136
+ print(f"❌ Failed to save index: {e}")
137
+
138
+ def save_documents():
139
+ try:
140
+ with open(DOCUMENTS_PATH, 'w') as f:
141
+ json.dump(documents, f, indent=2)
142
+ except Exception as e:
143
+ print(f"❌ Failed to save documents: {e}")
144
+
145
+ def image_from_pdf(pdf_bytes):
146
+ try:
147
+ images = convert_from_bytes(pdf_bytes, dpi=200)
148
+ return images[0]
149
+ except Exception as e:
150
+ print(f"❌ PDF conversion error: {e}")
151
+ return None
152
+
153
+ def extract_text(image):
154
+ try:
155
+ if image is None:
156
+ return "❌ No image provided"
157
+
158
+ if isinstance(image, bytes):
159
+ image = Image.open(io.BytesIO(image))
160
+ elif not isinstance(image, Image.Image):
161
+ image = Image.fromarray(image)
162
+
163
+ if image.mode != 'RGB':
164
+ image = image.convert('RGB')
165
+
166
+ custom_config = r'--oem 3 --psm 6'
167
+ text = pytesseract.image_to_string(image, config=custom_config)
168
+ return text.strip() if text.strip() else "❓ No text detected"
169
+ except Exception as e:
170
+ return f"❌ OCR error: {str(e)}"
171
+
172
+ def get_clip_embedding(image):
173
+ try:
174
+ if image is None:
175
+ return None
176
+
177
+ if isinstance(image, bytes):
178
+ image = Image.open(io.BytesIO(image))
179
+ elif not isinstance(image, Image.Image):
180
+ image = Image.fromarray(image)
181
+
182
+ if image.mode != 'RGB':
183
+ image = image.convert('RGB')
184
+
185
+ image_input = preprocess(image).unsqueeze(0).to(device)
186
+ with torch.no_grad():
187
+ image_features = clip_model.encode_image(image_input)
188
+ image_features = image_features / image_features.norm(dim=-1, keepdim=True)
189
+ return image_features.cpu().numpy()[0]
190
+ except Exception as e:
191
+ print(f"❌ CLIP embedding error: {e}")
192
+ return None
193
+
194
+ def save_uploaded_file(file_content: bytes, filename: str) -> str:
195
+ file_id = str(uuid.uuid4())
196
+ file_extension = os.path.splitext(filename)[1]
197
+ saved_filename = f"{file_id}{file_extension}"
198
+ file_path = os.path.join(UPLOADS_DIR, saved_filename)
199
+
200
+ with open(file_path, 'wb') as f:
201
+ f.write(file_content)
202
+
203
+ return saved_filename
204
+
205
+ # --- API Endpoints ---
206
+
207
+ @app.get("/", response_class=HTMLResponse)
208
+ async def dashboard():
209
+ with open("static/index.html", "r") as f:
210
+ return HTMLResponse(content=f.read())
211
+
212
+ @app.post("/api/login")
213
+ async def login(username: str = Form(...), password: str = Form(...)):
214
+ user = authenticate_user(username, password)
215
+ if not user:
216
+ raise HTTPException(
217
+ status_code=status.HTTP_401_UNAUTHORIZED,
218
+ detail="Incorrect username or password"
219
+ )
220
+ access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
221
+ access_token = create_access_token(
222
+ data={"sub": user["username"]}, expires_delta=access_token_expires
223
+ )
224
+ return {"access_token": access_token, "token_type": "bearer", "username": user["username"]}
225
+
226
+ @app.post("/api/upload-category")
227
+ async def upload_category(
228
+ file: UploadFile = File(...),
229
+ label: str = Form(...),
230
+ current_user: dict = Depends(get_current_user)
231
+ ):
232
+ try:
233
+ if not label or not label.strip():
234
+ raise HTTPException(status_code=400, detail="Please provide a label")
235
+
236
+ label = label.strip()
237
+ file_content = await file.read()
238
+
239
+ if file.content_type and file.content_type.startswith('application/pdf'):
240
+ image = image_from_pdf(file_content)
241
+ else:
242
+ image = Image.open(io.BytesIO(file_content))
243
+
244
+ if image is None:
245
+ raise HTTPException(status_code=400, detail="Failed to process image")
246
+
247
+ embedding = get_clip_embedding(image)
248
+ if embedding is None:
249
+ raise HTTPException(status_code=400, detail="Failed to generate embedding")
250
+
251
+ index.add(np.array([embedding]))
252
+ labels.append(label)
253
+ save_index()
254
+
255
+ return {"message": f"✅ Added category '{label}' (Total: {len(labels)} categories)", "status": "success"}
256
+ except Exception as e:
257
+ raise HTTPException(status_code=500, detail=str(e))
258
+
259
+ @app.post("/api/classify-document")
260
+ async def classify_document(
261
+ file: UploadFile = File(...),
262
+ current_user: dict = Depends(get_current_user)
263
+ ):
264
+ try:
265
+ if len(labels) == 0:
266
+ raise HTTPException(status_code=400, detail="No categories in database. Please add some first.")
267
+
268
+ file_content = await file.read()
269
+
270
+ if file.content_type and file.content_type.startswith('application/pdf'):
271
+ image = image_from_pdf(file_content)
272
+ else:
273
+ image = Image.open(io.BytesIO(file_content))
274
+
275
+ if image is None:
276
+ raise HTTPException(status_code=400, detail="Failed to process image")
277
+
278
+ embedding = get_clip_embedding(image)
279
+ if embedding is None:
280
+ raise HTTPException(status_code=400, detail="Failed to generate embedding")
281
+
282
+ # Search for top 3 matches
283
+ k = min(3, len(labels))
284
+ D, I = index.search(np.array([embedding]), k=k)
285
+
286
+ if len(labels) > 0 and I[0][0] < len(labels):
287
+ similarity = 1 - D[0][0]
288
+ confidence_threshold = 0.35
289
+
290
+ best_match = labels[I[0][0]]
291
+ matches = []
292
+
293
+ for i in range(min(k, len(D[0]))):
294
+ if I[0][i] < len(labels):
295
+ sim = 1 - D[0][i]
296
+ matches.append({"category": labels[I[0][i]], "similarity": round(sim, 3)})
297
+
298
+ # Save classified document
299
+ if similarity >= confidence_threshold:
300
+ saved_filename = save_uploaded_file(file_content, file.filename)
301
+ ocr_text = extract_text(image)
302
+
303
+ document = {
304
+ "id": str(uuid.uuid4()),
305
+ "filename": saved_filename,
306
+ "original_filename": file.filename,
307
+ "category": best_match,
308
+ "similarity": round(similarity, 3),
309
+ "ocr_text": ocr_text,
310
+ "upload_date": datetime.now().isoformat(),
311
+ "file_path": os.path.join(UPLOADS_DIR, saved_filename)
312
+ }
313
+
314
+ documents.append(document)
315
+ save_documents()
316
+
317
+ return {
318
+ "status": "success",
319
+ "category": best_match,
320
+ "similarity": round(similarity, 3),
321
+ "confidence": "high" if similarity >= confidence_threshold else "low",
322
+ "matches": matches,
323
+ "document_saved": True,
324
+ "document_id": document["id"]
325
+ }
326
+ else:
327
+ return {
328
+ "status": "low_confidence",
329
+ "category": best_match,
330
+ "similarity": round(similarity, 3),
331
+ "confidence": "low",
332
+ "matches": matches,
333
+ "document_saved": False
334
+ }
335
+
336
+ raise HTTPException(status_code=400, detail="Document not recognized")
337
+ except Exception as e:
338
+ raise HTTPException(status_code=500, detail=str(e))
339
+
340
+ @app.get("/api/categories")
341
+ async def get_categories(current_user: dict = Depends(get_current_user)):
342
+ categories = list(set(labels)) # Remove duplicates
343
+ category_counts = {}
344
+ for label in labels:
345
+ category_counts[label] = category_counts.get(label, 0) + 1
346
+
347
+ return {"categories": categories, "counts": category_counts}
348
+
349
+ @app.get("/api/documents/{category}")
350
+ async def get_documents_by_category(
351
+ category: str,
352
+ current_user: dict = Depends(get_current_user)
353
+ ):
354
+ category_documents = [doc for doc in documents if doc["category"] == category]
355
+ return {"documents": category_documents, "count": len(category_documents)}
356
+
357
+ @app.get("/api/documents")
358
+ async def get_all_documents(current_user: dict = Depends(get_current_user)):
359
+ return {"documents": documents, "count": len(documents)}
360
+
361
+ @app.delete("/api/documents/{document_id}")
362
+ async def delete_document(
363
+ document_id: str,
364
+ current_user: dict = Depends(get_current_user)
365
+ ):
366
+ try:
367
+ # Find document
368
+ document_index = None
369
+ document_to_delete = None
370
+
371
+ for i, doc in enumerate(documents):
372
+ if doc["id"] == document_id:
373
+ document_index = i
374
+ document_to_delete = doc
375
+ break
376
+
377
+ if document_to_delete is None:
378
+ raise HTTPException(status_code=404, detail="Document not found")
379
+
380
+ # Delete physical file
381
+ file_path = document_to_delete.get("file_path")
382
+ if file_path and os.path.exists(file_path):
383
+ os.remove(file_path)
384
+
385
+ # Remove from documents list
386
+ documents.pop(document_index)
387
+ save_documents()
388
+
389
+ return {"message": "Document deleted successfully", "status": "success"}
390
+ except Exception as e:
391
+ raise HTTPException(status_code=500, detail=str(e))
392
+
393
+ @app.post("/api/ocr")
394
+ async def ocr_document(
395
+ file: UploadFile = File(...),
396
+ current_user: dict = Depends(get_current_user)
397
+ ):
398
+ try:
399
+ file_content = await file.read()
400
+
401
+ if file.content_type and file.content_type.startswith('application/pdf'):
402
+ image = image_from_pdf(file_content)
403
+ else:
404
+ image = Image.open(io.BytesIO(file_content))
405
+
406
+ if image is None:
407
+ raise HTTPException(status_code=400, detail="Failed to process image")
408
+
409
+ text = extract_text(image)
410
+ return {"text": text, "status": "success"}
411
+ except Exception as e:
412
+ raise HTTPException(status_code=500, detail=str(e))
413
+
414
+ @app.get("/api/stats")
415
+ async def get_stats(current_user: dict = Depends(get_current_user)):
416
+ category_stats = {}
417
+ for doc in documents:
418
+ category = doc["category"]
419
+ if category not in category_stats:
420
+ category_stats[category] = 0
421
+ category_stats[category] += 1
422
+
423
+ return {
424
+ "total_categories": len(set(labels)),
425
+ "total_documents": len(documents),
426
+ "category_distribution": category_stats
427
+ }
428
+
429
+ if __name__ == "__main__":
430
+ import uvicorn
431
+ uvicorn.run(app, host="0.0.0.0", port=7860)
main.py ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Depends, status
2
+ from fastapi.responses import HTMLResponse, JSONResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
5
+ import pytesseract
6
+ from PIL import Image
7
+ import numpy as np
8
+ import faiss
9
+ import os
10
+ import pickle
11
+ from pdf2image import convert_from_bytes
12
+ import torch
13
+ import clip
14
+ import io
15
+ import json
16
+ import uuid
17
+ from datetime import datetime, timedelta
18
+ from typing import List, Dict, Any, Optional
19
+ import base64
20
+ import jwt
21
+ from passlib.context import CryptContext
22
+
23
+ app = FastAPI(title="Handwritten Archive Document Digitalization System")
24
+
25
+ # Security configuration
26
+ SECRET_KEY = "your-secret-key-change-this-in-production"
27
+ ALGORITHM = "HS256"
28
+ ACCESS_TOKEN_EXPIRE_MINUTES = 30
29
+
30
+ pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
31
+ security = HTTPBearer()
32
+
33
+ # Default admin user (change in production)
34
+ USERS_DB = {
35
+ "admin": {
36
+ "username": "admin",
37
+ "hashed_password": pwd_context.hash("admin123"),
38
+ "is_active": True
39
+ }
40
+ }
41
+
42
+ # Mount static files
43
+ app.mount("/static", StaticFiles(directory="static"), name="static")
44
+
45
+ # --- Load or Initialize Model/Index ---
46
+ device = "cuda" if torch.cuda.is_available() else "cpu"
47
+ clip_model, preprocess = clip.load("ViT-B/32", device=device)
48
+
49
+ INDEX_PATH = "data/index.faiss"
50
+ LABELS_PATH = "data/labels.pkl"
51
+ DOCUMENTS_PATH = "data/documents.json"
52
+ UPLOADS_DIR = "data/uploads"
53
+
54
+ # Ensure directories exist
55
+ os.makedirs("data", exist_ok=True)
56
+ os.makedirs("static", exist_ok=True)
57
+ os.makedirs(UPLOADS_DIR, exist_ok=True)
58
+
59
+ # Initialize index and labels with error handling
60
+ index = faiss.IndexFlatL2(512)
61
+ labels = []
62
+ documents = []
63
+
64
+ if os.path.exists(INDEX_PATH) and os.path.exists(LABELS_PATH):
65
+ try:
66
+ index = faiss.read_index(INDEX_PATH)
67
+ with open(LABELS_PATH, "rb") as f:
68
+ labels = pickle.load(f)
69
+ print(f"✅ Loaded existing index with {len(labels)} labels")
70
+ except (RuntimeError, EOFError, pickle.UnpicklingError) as e:
71
+ print(f"⚠️ Failed to load existing index: {e}")
72
+ print("🔄 Starting with fresh index")
73
+ if os.path.exists(INDEX_PATH):
74
+ os.remove(INDEX_PATH)
75
+ if os.path.exists(LABELS_PATH):
76
+ os.remove(LABELS_PATH)
77
+
78
+ # Load documents database
79
+ if os.path.exists(DOCUMENTS_PATH):
80
+ try:
81
+ with open(DOCUMENTS_PATH, 'r') as f:
82
+ documents = json.load(f)
83
+ except:
84
+ documents = []
85
+
86
+ # Authentication functions
87
+ def verify_password(plain_password, hashed_password):
88
+ return pwd_context.verify(plain_password, hashed_password)
89
+
90
+ def get_password_hash(password):
91
+ return pwd_context.hash(password)
92
+
93
+ def authenticate_user(username: str, password: str):
94
+ user = USERS_DB.get(username)
95
+ if not user or not verify_password(password, user["hashed_password"]):
96
+ return False
97
+ return user
98
+
99
+ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
100
+ to_encode = data.copy()
101
+ if expires_delta:
102
+ expire = datetime.utcnow() + expires_delta
103
+ else:
104
+ expire = datetime.utcnow() + timedelta(minutes=15)
105
+ to_encode.update({"exp": expire})
106
+ encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
107
+ return encoded_jwt
108
+
109
+ async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
110
+ credentials_exception = HTTPException(
111
+ status_code=status.HTTP_401_UNAUTHORIZED,
112
+ detail="Could not validate credentials",
113
+ headers={"WWW-Authenticate": "Bearer"},
114
+ )
115
+ try:
116
+ payload = jwt.decode(credentials.credentials, SECRET_KEY, algorithms=[ALGORITHM])
117
+ username: str = payload.get("sub")
118
+ if username is None:
119
+ raise credentials_exception
120
+ except jwt.PyJWTError:
121
+ raise credentials_exception
122
+
123
+ user = USERS_DB.get(username)
124
+ if user is None:
125
+ raise credentials_exception
126
+ return user
127
+
128
+ # --- Utilities ---
129
+ def save_index():
130
+ try:
131
+ os.makedirs("data", exist_ok=True)
132
+ faiss.write_index(index, INDEX_PATH)
133
+ with open(LABELS_PATH, "wb") as f:
134
+ pickle.dump(labels, f)
135
+ except Exception as e:
136
+ print(f"❌ Failed to save index: {e}")
137
+
138
+ def save_documents():
139
+ try:
140
+ with open(DOCUMENTS_PATH, 'w') as f:
141
+ json.dump(documents, f, indent=2)
142
+ except Exception as e:
143
+ print(f"❌ Failed to save documents: {e}")
144
+
145
+ def image_from_pdf(pdf_bytes):
146
+ try:
147
+ images = convert_from_bytes(pdf_bytes, dpi=200)
148
+ return images[0]
149
+ except Exception as e:
150
+ print(f"❌ PDF conversion error: {e}")
151
+ return None
152
+
153
+ def extract_text(image):
154
+ try:
155
+ if image is None:
156
+ return "❌ No image provided"
157
+
158
+ if isinstance(image, bytes):
159
+ image = Image.open(io.BytesIO(image))
160
+ elif not isinstance(image, Image.Image):
161
+ image = Image.fromarray(image)
162
+
163
+ if image.mode != 'RGB':
164
+ image = image.convert('RGB')
165
+
166
+ custom_config = r'--oem 3 --psm 6'
167
+ text = pytesseract.image_to_string(image, config=custom_config)
168
+ return text.strip() if text.strip() else "❓ No text detected"
169
+ except Exception as e:
170
+ return f"❌ OCR error: {str(e)}"
171
+
172
+ def get_clip_embedding(image):
173
+ try:
174
+ if image is None:
175
+ return None
176
+
177
+ if isinstance(image, bytes):
178
+ image = Image.open(io.BytesIO(image))
179
+ elif not isinstance(image, Image.Image):
180
+ image = Image.fromarray(image)
181
+
182
+ if image.mode != 'RGB':
183
+ image = image.convert('RGB')
184
+
185
+ image_input = preprocess(image).unsqueeze(0).to(device)
186
+ with torch.no_grad():
187
+ image_features = clip_model.encode_image(image_input)
188
+ image_features = image_features / image_features.norm(dim=-1, keepdim=True)
189
+ return image_features.cpu().numpy()[0]
190
+ except Exception as e:
191
+ print(f"❌ CLIP embedding error: {e}")
192
+ return None
193
+
194
+ def save_uploaded_file(file_content: bytes, filename: str) -> str:
195
+ file_id = str(uuid.uuid4())
196
+ file_extension = os.path.splitext(filename)[1]
197
+ saved_filename = f"{file_id}{file_extension}"
198
+ file_path = os.path.join(UPLOADS_DIR, saved_filename)
199
+
200
+ with open(file_path, 'wb') as f:
201
+ f.write(file_content)
202
+
203
+ return saved_filename
204
+
205
+ # --- API Endpoints ---
206
+
207
+ @app.get("/", response_class=HTMLResponse)
208
+ async def dashboard():
209
+ with open("static/index.html", "r") as f:
210
+ return HTMLResponse(content=f.read())
211
+
212
+ @app.post("/api/login")
213
+ async def login(username: str = Form(...), password: str = Form(...)):
214
+ user = authenticate_user(username, password)
215
+ if not user:
216
+ raise HTTPException(
217
+ status_code=status.HTTP_401_UNAUTHORIZED,
218
+ detail="Incorrect username or password"
219
+ )
220
+ access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
221
+ access_token = create_access_token(
222
+ data={"sub": user["username"]}, expires_delta=access_token_expires
223
+ )
224
+ return {"access_token": access_token, "token_type": "bearer", "username": user["username"]}
225
+
226
+ @app.post("/api/upload-category")
227
+ async def upload_category(
228
+ file: UploadFile = File(...),
229
+ label: str = Form(...),
230
+ current_user: dict = Depends(get_current_user)
231
+ ):
232
+ try:
233
+ if not label or not label.strip():
234
+ raise HTTPException(status_code=400, detail="Please provide a label")
235
+
236
+ label = label.strip()
237
+ file_content = await file.read()
238
+
239
+ if file.content_type and file.content_type.startswith('application/pdf'):
240
+ image = image_from_pdf(file_content)
241
+ else:
242
+ image = Image.open(io.BytesIO(file_content))
243
+
244
+ if image is None:
245
+ raise HTTPException(status_code=400, detail="Failed to process image")
246
+
247
+ embedding = get_clip_embedding(image)
248
+ if embedding is None:
249
+ raise HTTPException(status_code=400, detail="Failed to generate embedding")
250
+
251
+ index.add(np.array([embedding]))
252
+ labels.append(label)
253
+ save_index()
254
+
255
+ return {"message": f"✅ Added category '{label}' (Total: {len(labels)} categories)", "status": "success"}
256
+ except Exception as e:
257
+ raise HTTPException(status_code=500, detail=str(e))
258
+
259
+ @app.post("/api/classify-document")
260
+ async def classify_document(
261
+ file: UploadFile = File(...),
262
+ current_user: dict = Depends(get_current_user)
263
+ ):
264
+ try:
265
+ if len(labels) == 0:
266
+ raise HTTPException(status_code=400, detail="No categories in database. Please add some first.")
267
+
268
+ file_content = await file.read()
269
+
270
+ if file.content_type and file.content_type.startswith('application/pdf'):
271
+ image = image_from_pdf(file_content)
272
+ else:
273
+ image = Image.open(io.BytesIO(file_content))
274
+
275
+ if image is None:
276
+ raise HTTPException(status_code=400, detail="Failed to process image")
277
+
278
+ embedding = get_clip_embedding(image)
279
+ if embedding is None:
280
+ raise HTTPException(status_code=400, detail="Failed to generate embedding")
281
+
282
+ # Search for top 3 matches
283
+ k = min(3, len(labels))
284
+ D, I = index.search(np.array([embedding]), k=k)
285
+
286
+ if len(labels) > 0 and I[0][0] < len(labels):
287
+ similarity = 1 - D[0][0]
288
+ confidence_threshold = 0.35
289
+
290
+ best_match = labels[I[0][0]]
291
+ matches = []
292
+
293
+ for i in range(min(k, len(D[0]))):
294
+ if I[0][i] < len(labels):
295
+ sim = 1 - D[0][i]
296
+ matches.append({"category": labels[I[0][i]], "similarity": round(sim, 3)})
297
+
298
+ # Save classified document
299
+ if similarity >= confidence_threshold:
300
+ saved_filename = save_uploaded_file(file_content, file.filename)
301
+ ocr_text = extract_text(image)
302
+
303
+ document = {
304
+ "id": str(uuid.uuid4()),
305
+ "filename": saved_filename,
306
+ "original_filename": file.filename,
307
+ "category": best_match,
308
+ "similarity": round(similarity, 3),
309
+ "ocr_text": ocr_text,
310
+ "upload_date": datetime.now().isoformat(),
311
+ "file_path": os.path.join(UPLOADS_DIR, saved_filename)
312
+ }
313
+
314
+ documents.append(document)
315
+ save_documents()
316
+
317
+ return {
318
+ "status": "success",
319
+ "category": best_match,
320
+ "similarity": round(similarity, 3),
321
+ "confidence": "high" if similarity >= confidence_threshold else "low",
322
+ "matches": matches,
323
+ "document_saved": True,
324
+ "document_id": document["id"]
325
+ }
326
+ else:
327
+ return {
328
+ "status": "low_confidence",
329
+ "category": best_match,
330
+ "similarity": round(similarity, 3),
331
+ "confidence": "low",
332
+ "matches": matches,
333
+ "document_saved": False
334
+ }
335
+
336
+ raise HTTPException(status_code=400, detail="Document not recognized")
337
+ except Exception as e:
338
+ raise HTTPException(status_code=500, detail=str(e))
339
+
340
+ @app.get("/api/categories")
341
+ async def get_categories(current_user: dict = Depends(get_current_user)):
342
+ categories = list(set(labels)) # Remove duplicates
343
+ category_counts = {}
344
+ for label in labels:
345
+ category_counts[label] = category_counts.get(label, 0) + 1
346
+
347
+ return {"categories": categories, "counts": category_counts}
348
+
349
+ @app.get("/api/documents/{category}")
350
+ async def get_documents_by_category(
351
+ category: str,
352
+ current_user: dict = Depends(get_current_user)
353
+ ):
354
+ category_documents = [doc for doc in documents if doc["category"] == category]
355
+ return {"documents": category_documents, "count": len(category_documents)}
356
+
357
+ @app.get("/api/documents")
358
+ async def get_all_documents(current_user: dict = Depends(get_current_user)):
359
+ return {"documents": documents, "count": len(documents)}
360
+
361
+ @app.delete("/api/documents/{document_id}")
362
+ async def delete_document(
363
+ document_id: str,
364
+ current_user: dict = Depends(get_current_user)
365
+ ):
366
+ try:
367
+ # Find document
368
+ document_index = None
369
+ document_to_delete = None
370
+
371
+ for i, doc in enumerate(documents):
372
+ if doc["id"] == document_id:
373
+ document_index = i
374
+ document_to_delete = doc
375
+ break
376
+
377
+ if document_to_delete is None:
378
+ raise HTTPException(status_code=404, detail="Document not found")
379
+
380
+ # Delete physical file
381
+ file_path = document_to_delete.get("file_path")
382
+ if file_path and os.path.exists(file_path):
383
+ os.remove(file_path)
384
+
385
+ # Remove from documents list
386
+ documents.pop(document_index)
387
+ save_documents()
388
+
389
+ return {"message": "Document deleted successfully", "status": "success"}
390
+ except Exception as e:
391
+ raise HTTPException(status_code=500, detail=str(e))
392
+
393
+ @app.post("/api/ocr")
394
+ async def ocr_document(
395
+ file: UploadFile = File(...),
396
+ current_user: dict = Depends(get_current_user)
397
+ ):
398
+ try:
399
+ file_content = await file.read()
400
+
401
+ if file.content_type and file.content_type.startswith('application/pdf'):
402
+ image = image_from_pdf(file_content)
403
+ else:
404
+ image = Image.open(io.BytesIO(file_content))
405
+
406
+ if image is None:
407
+ raise HTTPException(status_code=400, detail="Failed to process image")
408
+
409
+ text = extract_text(image)
410
+ return {"text": text, "status": "success"}
411
+ except Exception as e:
412
+ raise HTTPException(status_code=500, detail=str(e))
413
+
414
+ @app.get("/api/stats")
415
+ async def get_stats(current_user: dict = Depends(get_current_user)):
416
+ category_stats = {}
417
+ for doc in documents:
418
+ category = doc["category"]
419
+ if category not in category_stats:
420
+ category_stats[category] = 0
421
+ category_stats[category] += 1
422
+
423
+ return {
424
+ "total_categories": len(set(labels)),
425
+ "total_documents": len(documents),
426
+ "category_distribution": category_stats
427
+ }
428
+
429
+ if __name__ == "__main__":
430
+ import uvicorn
431
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ python-jose[cryptography]
5
+ passlib[bcrypt]
6
+ bcrypt
7
+ gradio
8
+ faiss-cpu
9
+ pytesseract
10
+ pdf2image
11
+ sentence-transformers
12
+ torch
13
+ torchvision
14
+ Pillow
15
+ ftfy
16
+ regex
17
+ tqdm
18
+ git+https://github.com/openai/CLIP.git
19
+ poppler-utils
space.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ sdk: "docker"
static/index.html ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Handwritten Archive Document Digitalization System</title>
7
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
8
+ <link rel="stylesheet" href="/static/styles.css">
9
+ </head>
10
+ <body>
11
+ <!-- Login Modal -->
12
+ <div id="loginModal" class="modal">
13
+ <div class="modal-content">
14
+ <h2><i class="fas fa-lock"></i> Login Required</h2>
15
+ <form id="loginForm">
16
+ <div class="form-group">
17
+ <label for="username">Username</label>
18
+ <input type="text" id="username" class="form-control" required value="admin">
19
+ </div>
20
+ <div class="form-group">
21
+ <label for="password">Password</label>
22
+ <input type="password" id="password" class="form-control" required value="admin123">
23
+ </div>
24
+ <button type="submit" class="btn btn-primary">
25
+ <i class="fas fa-sign-in-alt"></i> Login
26
+ </button>
27
+ </form>
28
+ <div id="loginResult"></div>
29
+ </div>
30
+ </div>
31
+
32
+ <!-- Main Application -->
33
+ <div id="mainApp" style="display: none;">
34
+ <div class="container">
35
+ <div class="header">
36
+ <div class="header-content">
37
+ <div>
38
+ <h1><i class="fas fa-archive"></i> Archive Digitalization System</h1>
39
+ <p>Handwritten Document Classification & Storage</p>
40
+ </div>
41
+ <div class="user-info">
42
+ <span id="welcomeUser"></span>
43
+ <button class="btn btn-secondary" onclick="logout()">
44
+ <i class="fas fa-sign-out-alt"></i> Logout
45
+ </button>
46
+ </div>
47
+ </div>
48
+ </div>
49
+
50
+ <div class="dashboard-stats" id="stats">
51
+ <!-- Stats will be loaded here -->
52
+ </div>
53
+
54
+ <div class="tabs">
55
+ <button class="tab-button active" onclick="showTab('upload')">
56
+ <i class="fas fa-upload"></i> Upload Categories
57
+ </button>
58
+ <button class="tab-button" onclick="showTab('classify')">
59
+ <i class="fas fa-search"></i> Classify Documents
60
+ </button>
61
+ <button class="tab-button" onclick="showTab('browse')">
62
+ <i class="fas fa-folder-open"></i> Browse Archive
63
+ </button>
64
+ <button class="tab-button" onclick="showTab('ocr')">
65
+ <i class="fas fa-eye"></i> OCR Text
66
+ </button>
67
+ </div>
68
+
69
+ <!-- Upload Categories Tab -->
70
+ <div id="upload" class="tab-content active">
71
+ <h2><i class="fas fa-tags"></i> Upload Document Categories</h2>
72
+ <p>Upload sample documents for each category to train the classification system.</p>
73
+
74
+ <form id="uploadForm">
75
+ <div class="form-group">
76
+ <label for="categoryFile">Document File (Image or PDF)</label>
77
+ <div class="file-upload" id="categoryUpload">
78
+ <i class="fas fa-cloud-upload-alt fa-2x"></i>
79
+ <p>Click to select or drag & drop files here</p>
80
+ <input type="file" id="categoryFile" accept="image/*,.pdf" style="display: none;">
81
+ </div>
82
+ </div>
83
+
84
+ <div class="form-group">
85
+ <label for="categoryLabel">Category Label</label>
86
+ <input type="text" id="categoryLabel" class="form-control" placeholder="e.g., birth_certificate, passport, diploma">
87
+ </div>
88
+
89
+ <button type="submit" class="btn btn-primary">
90
+ <i class="fas fa-plus"></i> Add Category
91
+ </button>
92
+ </form>
93
+
94
+ <div id="uploadResult"></div>
95
+ </div>
96
+
97
+ <!-- Classify Documents Tab -->
98
+ <div id="classify" class="tab-content">
99
+ <h2><i class="fas fa-robot"></i> Classify & Store Documents</h2>
100
+ <p>Upload documents to automatically classify and store them in the archive (min. 35% confidence).</p>
101
+
102
+ <form id="classifyForm">
103
+ <div class="form-group">
104
+ <label for="classifyFile">Document to Classify</label>
105
+ <div class="file-upload" id="classifyUpload">
106
+ <i class="fas fa-file fa-2x"></i>
107
+ <p>Click to select or drag & drop files here</p>
108
+ <input type="file" id="classifyFile" accept="image/*,.pdf" style="display: none;">
109
+ </div>
110
+ </div>
111
+
112
+ <button type="submit" class="btn btn-success">
113
+ <i class="fas fa-search"></i> Classify Document
114
+ </button>
115
+ </form>
116
+
117
+ <div id="classifyResult"></div>
118
+ </div>
119
+
120
+ <!-- Browse Archive Tab -->
121
+ <div id="browse" class="tab-content">
122
+ <h2><i class="fas fa-archive"></i> Browse Document Archive</h2>
123
+ <p>Browse and search through your classified documents by category.</p>
124
+
125
+ <div class="category-buttons" id="categoryButtons">
126
+ <!-- Category buttons will be loaded here -->
127
+ </div>
128
+
129
+ <div id="documentsContainer">
130
+ <!-- Documents will be loaded here -->
131
+ </div>
132
+ </div>
133
+
134
+ <!-- OCR Text Tab -->
135
+ <div id="ocr" class="tab-content">
136
+ <h2><i class="fas fa-eye"></i> OCR Text Extraction</h2>
137
+ <p>Extract text from documents using Optical Character Recognition.</p>
138
+
139
+ <form id="ocrForm">
140
+ <div class="form-group">
141
+ <label for="ocrFile">Document File</label>
142
+ <div class="file-upload" id="ocrUpload">
143
+ <i class="fas fa-file-alt fa-2x"></i>
144
+ <p>Click to select or drag & drop files here</p>
145
+ <input type="file" id="ocrFile" accept="image/*,.pdf" style="display: none;">
146
+ </div>
147
+ </div>
148
+
149
+ <button type="submit" class="btn btn-primary">
150
+ <i class="fas fa-search"></i> Extract Text
151
+ </button>
152
+ </form>
153
+
154
+ <div id="ocrResult"></div>
155
+ </div>
156
+ </div>
157
+ </div>
158
+
159
+ <script src="/static/script.js"></script>
160
+ </body>
161
+ </html>
static/script.js ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Global variables
2
+ let categories = [];
3
+ let documents = [];
4
+ let authToken = null;
5
+ let currentUser = null;
6
+
7
+ // Initialize app
8
+ document.addEventListener('DOMContentLoaded', function() {
9
+ checkAuth();
10
+ });
11
+
12
+ // Authentication functions
13
+ function checkAuth() {
14
+ authToken = localStorage.getItem('authToken');
15
+ currentUser = localStorage.getItem('currentUser');
16
+
17
+ if (authToken && currentUser) {
18
+ showMainApp();
19
+ document.getElementById('welcomeUser').textContent = `Welcome, ${currentUser}`;
20
+ loadStats();
21
+ loadCategories();
22
+ setupFileUploads();
23
+ } else {
24
+ showLoginModal();
25
+ }
26
+ }
27
+
28
+ function showLoginModal() {
29
+ document.getElementById('loginModal').style.display = 'flex';
30
+ document.getElementById('mainApp').style.display = 'none';
31
+ }
32
+
33
+ function showMainApp() {
34
+ document.getElementById('loginModal').style.display = 'none';
35
+ document.getElementById('mainApp').style.display = 'block';
36
+ }
37
+
38
+ function logout() {
39
+ localStorage.removeItem('authToken');
40
+ localStorage.removeItem('currentUser');
41
+ authToken = null;
42
+ currentUser = null;
43
+ showLoginModal();
44
+ }
45
+
46
+ // Login form handler
47
+ document.getElementById('loginForm').addEventListener('submit', async (e) => {
48
+ e.preventDefault();
49
+
50
+ const username = document.getElementById('username').value;
51
+ const password = document.getElementById('password').value;
52
+ const resultDiv = document.getElementById('loginResult');
53
+
54
+ const formData = new FormData();
55
+ formData.append('username', username);
56
+ formData.append('password', password);
57
+
58
+ try {
59
+ const response = await fetch('/api/login', {
60
+ method: 'POST',
61
+ body: formData
62
+ });
63
+
64
+ const result = await response.json();
65
+
66
+ if (response.ok) {
67
+ authToken = result.access_token;
68
+ currentUser = result.username;
69
+ localStorage.setItem('authToken', authToken);
70
+ localStorage.setItem('currentUser', currentUser);
71
+
72
+ showMainApp();
73
+ document.getElementById('welcomeUser').textContent = `Welcome, ${currentUser}`;
74
+ loadStats();
75
+ loadCategories();
76
+ setupFileUploads();
77
+ } else {
78
+ showResult(resultDiv, result.detail, 'error');
79
+ }
80
+ } catch (error) {
81
+ showResult(resultDiv, 'Login failed: ' + error.message, 'error');
82
+ }
83
+ });
84
+
85
+ // API request with authentication
86
+ async function authenticatedFetch(url, options = {}) {
87
+ if (!authToken) {
88
+ throw new Error('No authentication token');
89
+ }
90
+
91
+ const defaultOptions = {
92
+ headers: {
93
+ 'Authorization': `Bearer ${authToken}`,
94
+ ...options.headers
95
+ }
96
+ };
97
+
98
+ const response = await fetch(url, { ...options, ...defaultOptions });
99
+
100
+ if (response.status === 401) {
101
+ logout();
102
+ throw new Error('Authentication failed');
103
+ }
104
+
105
+ return response;
106
+ }
107
+
108
+ // Tab management
109
+ function showTab(tabName) {
110
+ // Hide all tabs
111
+ document.querySelectorAll('.tab-content').forEach(tab => {
112
+ tab.classList.remove('active');
113
+ });
114
+ document.querySelectorAll('.tab-button').forEach(btn => {
115
+ btn.classList.remove('active');
116
+ });
117
+
118
+ // Show selected tab
119
+ document.getElementById(tabName).classList.add('active');
120
+ event.target.classList.add('active');
121
+
122
+ // Load data for specific tabs
123
+ if (tabName === 'browse') {
124
+ loadCategories();
125
+ loadAllDocuments();
126
+ }
127
+ }
128
+
129
+ // Setup file upload drag & drop
130
+ function setupFileUploads() {
131
+ const uploads = [
132
+ { div: 'categoryUpload', input: 'categoryFile' },
133
+ { div: 'classifyUpload', input: 'classifyFile' },
134
+ { div: 'ocrUpload', input: 'ocrFile' }
135
+ ];
136
+
137
+ uploads.forEach(upload => {
138
+ const uploadDiv = document.getElementById(upload.div);
139
+ const fileInput = document.getElementById(upload.input);
140
+
141
+ uploadDiv.addEventListener('click', () => fileInput.click());
142
+
143
+ uploadDiv.addEventListener('dragover', (e) => {
144
+ e.preventDefault();
145
+ uploadDiv.classList.add('dragover');
146
+ });
147
+
148
+ uploadDiv.addEventListener('dragleave', () => {
149
+ uploadDiv.classList.remove('dragover');
150
+ });
151
+
152
+ uploadDiv.addEventListener('drop', (e) => {
153
+ e.preventDefault();
154
+ uploadDiv.classList.remove('dragover');
155
+ const files = e.dataTransfer.files;
156
+ if (files.length > 0) {
157
+ fileInput.files = files;
158
+ uploadDiv.querySelector('p').textContent = files[0].name;
159
+ }
160
+ });
161
+
162
+ fileInput.addEventListener('change', () => {
163
+ if (fileInput.files.length > 0) {
164
+ uploadDiv.querySelector('p').textContent = fileInput.files[0].name;
165
+ }
166
+ });
167
+ });
168
+ }
169
+
170
+ // Load dashboard stats
171
+ async function loadStats() {
172
+ try {
173
+ const response = await authenticatedFetch('/api/stats');
174
+ const stats = await response.json();
175
+
176
+ const statsHtml = `
177
+ <div class="stat-card">
178
+ <h3>${stats.total_categories}</h3>
179
+ <p><i class="fas fa-tags"></i> Total Categories</p>
180
+ </div>
181
+ <div class="stat-card">
182
+ <h3>${stats.total_documents}</h3>
183
+ <p><i class="fas fa-file"></i> Documents Archived</p>
184
+ </div>
185
+ <div class="stat-card">
186
+ <h3>35%</h3>
187
+ <p><i class="fas fa-percentage"></i> Min Confidence</p>
188
+ </div>
189
+ `;
190
+
191
+ document.getElementById('stats').innerHTML = statsHtml;
192
+ } catch (error) {
193
+ console.error('Error loading stats:', error);
194
+ }
195
+ }
196
+
197
+ // Load categories
198
+ async function loadCategories() {
199
+ try {
200
+ const response = await authenticatedFetch('/api/categories');
201
+ const data = await response.json();
202
+ categories = data.categories;
203
+
204
+ const buttonsHtml = `
205
+ <button class="category-btn active" onclick="filterDocuments('all')">
206
+ All Documents
207
+ </button>
208
+ ${categories.map(cat => `
209
+ <button class="category-btn" onclick="filterDocuments('${cat}')">
210
+ ${cat} (${data.counts[cat] || 0})
211
+ </button>
212
+ `).join('')}
213
+ `;
214
+
215
+ document.getElementById('categoryButtons').innerHTML = buttonsHtml;
216
+ } catch (error) {
217
+ console.error('Error loading categories:', error);
218
+ }
219
+ }
220
+
221
+ // Load all documents
222
+ async function loadAllDocuments() {
223
+ try {
224
+ const response = await authenticatedFetch('/api/documents');
225
+ const data = await response.json();
226
+ documents = data.documents;
227
+ displayDocuments(documents);
228
+ } catch (error) {
229
+ console.error('Error loading documents:', error);
230
+ }
231
+ }
232
+
233
+ // Filter documents by category
234
+ async function filterDocuments(category) {
235
+ // Update active button
236
+ document.querySelectorAll('.category-btn').forEach(btn => {
237
+ btn.classList.remove('active');
238
+ });
239
+ event.target.classList.add('active');
240
+
241
+ try {
242
+ let filteredDocs;
243
+ if (category === 'all') {
244
+ const response = await authenticatedFetch('/api/documents');
245
+ const data = await response.json();
246
+ filteredDocs = data.documents;
247
+ } else {
248
+ const response = await authenticatedFetch(`/api/documents/${category}`);
249
+ const data = await response.json();
250
+ filteredDocs = data.documents;
251
+ }
252
+
253
+ displayDocuments(filteredDocs);
254
+ } catch (error) {
255
+ console.error('Error filtering documents:', error);
256
+ }
257
+ }
258
+
259
+ // Delete document
260
+ async function deleteDocument(documentId, filename) {
261
+ if (!confirm(`Are you sure you want to delete "${filename}"? This action cannot be undone.`)) {
262
+ return;
263
+ }
264
+
265
+ try {
266
+ const response = await authenticatedFetch(`/api/documents/${documentId}`, {
267
+ method: 'DELETE'
268
+ });
269
+
270
+ const result = await response.json();
271
+
272
+ if (response.ok) {
273
+ // Refresh the current view
274
+ loadAllDocuments();
275
+ loadStats();
276
+ loadCategories();
277
+ alert('Document deleted successfully');
278
+ } else {
279
+ alert('Failed to delete document: ' + result.detail);
280
+ }
281
+ } catch (error) {
282
+ alert('Error deleting document: ' + error.message);
283
+ }
284
+ }
285
+
286
+ // Display documents
287
+ function displayDocuments(docs) {
288
+ const container = document.getElementById('documentsContainer');
289
+
290
+ if (docs.length === 0) {
291
+ container.innerHTML = '<p>No documents found for this category.</p>';
292
+ return;
293
+ }
294
+
295
+ const docsHtml = docs.map(doc => {
296
+ const similarityClass = doc.similarity >= 0.7 ? 'similarity-high' :
297
+ doc.similarity >= 0.5 ? 'similarity-medium' : 'similarity-low';
298
+
299
+ return `
300
+ <div class="document-card">
301
+ <h4><i class="fas fa-file"></i> ${doc.original_filename}</h4>
302
+ <p><strong>Category:</strong> ${doc.category}</p>
303
+ <p><strong>Confidence:</strong>
304
+ <span class="similarity-badge ${similarityClass}">
305
+ ${(doc.similarity * 100).toFixed(1)}%
306
+ </span>
307
+ </p>
308
+ <p><strong>Upload Date:</strong> ${new Date(doc.upload_date).toLocaleDateString()}</p>
309
+ <p><strong>OCR Preview:</strong></p>
310
+ <div style="max-height: 100px; overflow-y: auto; background: #f8f9fa; padding: 0.5rem; border-radius: 4px; font-size: 0.8rem;">
311
+ ${doc.ocr_text.substring(0, 200)}${doc.ocr_text.length > 200 ? '...' : ''}
312
+ </div>
313
+ <div class="document-actions">
314
+ <button class="btn btn-danger" onclick="deleteDocument('${doc.id}', '${doc.original_filename}')">
315
+ <i class="fas fa-trash"></i> Delete
316
+ </button>
317
+ </div>
318
+ </div>
319
+ `;
320
+ }).join('');
321
+
322
+ container.innerHTML = `<div class="document-grid">${docsHtml}</div>`;
323
+ }
324
+
325
+ // Form submissions
326
+ document.getElementById('uploadForm').addEventListener('submit', async (e) => {
327
+ e.preventDefault();
328
+
329
+ const fileInput = document.getElementById('categoryFile');
330
+ const labelInput = document.getElementById('categoryLabel');
331
+ const resultDiv = document.getElementById('uploadResult');
332
+
333
+ if (!fileInput.files[0] || !labelInput.value.trim()) {
334
+ showResult(resultDiv, 'Please select a file and enter a label.', 'error');
335
+ return;
336
+ }
337
+
338
+ const formData = new FormData();
339
+ formData.append('file', fileInput.files[0]);
340
+ formData.append('label', labelInput.value.trim());
341
+
342
+ showResult(resultDiv, '<div class="loading"></div> Uploading...', 'info');
343
+
344
+ try {
345
+ const response = await authenticatedFetch('/api/upload-category', {
346
+ method: 'POST',
347
+ body: formData
348
+ });
349
+
350
+ const result = await response.json();
351
+
352
+ if (response.ok) {
353
+ showResult(resultDiv, result.message, 'success');
354
+ labelInput.value = '';
355
+ fileInput.value = '';
356
+ document.querySelector('#categoryUpload p').textContent = 'Click to select or drag & drop files here';
357
+ loadStats();
358
+ loadCategories();
359
+ } else {
360
+ showResult(resultDiv, result.detail, 'error');
361
+ }
362
+ } catch (error) {
363
+ showResult(resultDiv, 'Upload failed: ' + error.message, 'error');
364
+ }
365
+ });
366
+
367
+ document.getElementById('classifyForm').addEventListener('submit', async (e) => {
368
+ e.preventDefault();
369
+
370
+ const fileInput = document.getElementById('classifyFile');
371
+ const resultDiv = document.getElementById('classifyResult');
372
+
373
+ if (!fileInput.files[0]) {
374
+ showResult(resultDiv, 'Please select a file to classify.', 'error');
375
+ return;
376
+ }
377
+
378
+ const formData = new FormData();
379
+ formData.append('file', fileInput.files[0]);
380
+
381
+ showResult(resultDiv, '<div class="loading"></div> Classifying...', 'info');
382
+
383
+ try {
384
+ const response = await authenticatedFetch('/api/classify-document', {
385
+ method: 'POST',
386
+ body: formData
387
+ });
388
+
389
+ const result = await response.json();
390
+
391
+ if (response.ok) {
392
+ const confidenceText = result.confidence === 'high' ? '✅ High Confidence' : '⚠️ Low Confidence';
393
+ const savedText = result.document_saved ? '\n📁 Document saved to archive' : '';
394
+
395
+ let matchesText = '\n\nTop matches:\n';
396
+ result.matches.forEach(match => {
397
+ matchesText += `• ${match.category}: ${(match.similarity * 100).toFixed(1)}%\n`;
398
+ });
399
+
400
+ showResult(resultDiv,
401
+ `🎯 Classification: ${result.category}\n` +
402
+ `${confidenceText} (${(result.similarity * 100).toFixed(1)}%)${savedText}${matchesText}`,
403
+ result.confidence === 'high' ? 'success' : 'warning'
404
+ );
405
+
406
+ fileInput.value = '';
407
+ document.querySelector('#classifyUpload p').textContent = 'Click to select or drag & drop files here';
408
+ loadStats();
409
+ } else {
410
+ showResult(resultDiv, result.detail, 'error');
411
+ }
412
+ } catch (error) {
413
+ showResult(resultDiv, 'Classification failed: ' + error.message, 'error');
414
+ }
415
+ });
416
+
417
+ document.getElementById('ocrForm').addEventListener('submit', async (e) => {
418
+ e.preventDefault();
419
+
420
+ const fileInput = document.getElementById('ocrFile');
421
+ const resultDiv = document.getElementById('ocrResult');
422
+
423
+ if (!fileInput.files[0]) {
424
+ showResult(resultDiv, 'Please select a file for OCR.', 'error');
425
+ return;
426
+ }
427
+
428
+ const formData = new FormData();
429
+ formData.append('file', fileInput.files[0]);
430
+
431
+ showResult(resultDiv, '<div class="loading"></div> Extracting text...', 'info');
432
+
433
+ try {
434
+ const response = await authenticatedFetch('/api/ocr', {
435
+ method: 'POST',
436
+ body: formData
437
+ });
438
+
439
+ const result = await response.json();
440
+
441
+ if (response.ok) {
442
+ showResult(resultDiv, result.text, 'success');
443
+ } else {
444
+ showResult(resultDiv, result.detail, 'error');
445
+ }
446
+ } catch (error) {
447
+ showResult(resultDiv, 'OCR failed: ' + error.message, 'error');
448
+ }
449
+ });
450
+
451
+ // Utility function to show results
452
+ function showResult(element, message, type) {
453
+ const className = type === 'success' ? 'result-success' :
454
+ type === 'error' ? 'result-error' :
455
+ type === 'warning' ? 'result-warning' : '';
456
+
457
+ element.innerHTML = `<div class="result-box ${className}">${message}</div>`;
458
+ }
static/styles.css ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --primary-color: #2563eb;
3
+ --secondary-color: #1e40af;
4
+ --success-color: #10b981;
5
+ --warning-color: #f59e0b;
6
+ --error-color: #ef4444;
7
+ --bg-color: #f8fafc;
8
+ --card-bg: #ffffff;
9
+ --text-primary: #1f2937;
10
+ --text-secondary: #6b7280;
11
+ --border-color: #e5e7eb;
12
+ }
13
+
14
+ * {
15
+ margin: 0;
16
+ padding: 0;
17
+ box-sizing: border-box;
18
+ }
19
+
20
+ body {
21
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
22
+ background-color: var(--bg-color);
23
+ color: var(--text-primary);
24
+ line-height: 1.6;
25
+ }
26
+
27
+ /* Modal Styles */
28
+ .modal {
29
+ display: flex;
30
+ position: fixed;
31
+ z-index: 1000;
32
+ left: 0;
33
+ top: 0;
34
+ width: 100%;
35
+ height: 100%;
36
+ background-color: rgba(0,0,0,0.5);
37
+ align-items: center;
38
+ justify-content: center;
39
+ }
40
+
41
+ .modal-content {
42
+ background-color: var(--card-bg);
43
+ padding: 2rem;
44
+ border-radius: 12px;
45
+ box-shadow: 0 10px 25px rgba(0,0,0,0.2);
46
+ width: 90%;
47
+ max-width: 400px;
48
+ }
49
+
50
+ .modal-content h2 {
51
+ text-align: center;
52
+ margin-bottom: 1.5rem;
53
+ color: var(--primary-color);
54
+ }
55
+
56
+ .container {
57
+ max-width: 1200px;
58
+ margin: 0 auto;
59
+ padding: 20px;
60
+ }
61
+
62
+ .header {
63
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
64
+ color: white;
65
+ padding: 2rem;
66
+ margin-bottom: 2rem;
67
+ border-radius: 12px;
68
+ }
69
+
70
+ .header-content {
71
+ display: flex;
72
+ justify-content: space-between;
73
+ align-items: center;
74
+ flex-wrap: wrap;
75
+ gap: 1rem;
76
+ }
77
+
78
+ .header h1 {
79
+ font-size: 2.5rem;
80
+ margin-bottom: 0.5rem;
81
+ }
82
+
83
+ .header p {
84
+ font-size: 1.1rem;
85
+ opacity: 0.9;
86
+ }
87
+
88
+ .user-info {
89
+ display: flex;
90
+ align-items: center;
91
+ gap: 1rem;
92
+ }
93
+
94
+ .user-info span {
95
+ font-weight: 500;
96
+ }
97
+
98
+ .dashboard-stats {
99
+ display: grid;
100
+ grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
101
+ gap: 1.5rem;
102
+ margin-bottom: 2rem;
103
+ }
104
+
105
+ .stat-card {
106
+ background: var(--card-bg);
107
+ padding: 1.5rem;
108
+ border-radius: 12px;
109
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
110
+ border: 1px solid var(--border-color);
111
+ }
112
+
113
+ .stat-card h3 {
114
+ font-size: 2rem;
115
+ color: var(--primary-color);
116
+ margin-bottom: 0.5rem;
117
+ }
118
+
119
+ .stat-card p {
120
+ color: var(--text-secondary);
121
+ font-size: 0.9rem;
122
+ }
123
+
124
+ .tabs {
125
+ display: flex;
126
+ margin-bottom: 2rem;
127
+ background: var(--card-bg);
128
+ border-radius: 12px;
129
+ padding: 0.5rem;
130
+ box-shadow: 0 2px 4px -1px rgba(0, 0, 0, 0.1);
131
+ }
132
+
133
+ .tab-button {
134
+ flex: 1;
135
+ padding: 1rem;
136
+ border: none;
137
+ background: transparent;
138
+ cursor: pointer;
139
+ border-radius: 8px;
140
+ font-weight: 500;
141
+ transition: all 0.3s ease;
142
+ }
143
+
144
+ .tab-button.active {
145
+ background: var(--primary-color);
146
+ color: white;
147
+ }
148
+
149
+ .tab-content {
150
+ display: none;
151
+ background: var(--card-bg);
152
+ padding: 2rem;
153
+ border-radius: 12px;
154
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
155
+ border: 1px solid var(--border-color);
156
+ }
157
+
158
+ .tab-content.active {
159
+ display: block;
160
+ }
161
+
162
+ .form-group {
163
+ margin-bottom: 1.5rem;
164
+ }
165
+
166
+ .form-group label {
167
+ display: block;
168
+ margin-bottom: 0.5rem;
169
+ font-weight: 600;
170
+ color: var(--text-primary);
171
+ }
172
+
173
+ .form-control {
174
+ width: 100%;
175
+ padding: 0.75rem;
176
+ border: 2px solid var(--border-color);
177
+ border-radius: 8px;
178
+ font-size: 1rem;
179
+ transition: border-color 0.3s ease;
180
+ }
181
+
182
+ .form-control:focus {
183
+ outline: none;
184
+ border-color: var(--primary-color);
185
+ }
186
+
187
+ .btn {
188
+ padding: 0.75rem 1.5rem;
189
+ border: none;
190
+ border-radius: 8px;
191
+ font-size: 1rem;
192
+ font-weight: 600;
193
+ cursor: pointer;
194
+ transition: all 0.3s ease;
195
+ }
196
+
197
+ .btn-primary {
198
+ background: var(--primary-color);
199
+ color: white;
200
+ }
201
+
202
+ .btn-primary:hover {
203
+ background: var(--secondary-color);
204
+ }
205
+
206
+ .btn-secondary {
207
+ background: var(--text-secondary);
208
+ color: white;
209
+ }
210
+
211
+ .btn-success {
212
+ background: var(--success-color);
213
+ color: white;
214
+ }
215
+
216
+ .btn-danger {
217
+ background: var(--error-color);
218
+ color: white;
219
+ }
220
+
221
+ .btn-danger:hover {
222
+ background: #dc2626;
223
+ }
224
+
225
+ .file-upload {
226
+ border: 2px dashed var(--border-color);
227
+ border-radius: 8px;
228
+ padding: 2rem;
229
+ text-align: center;
230
+ cursor: pointer;
231
+ transition: all 0.3s ease;
232
+ }
233
+
234
+ .file-upload:hover {
235
+ border-color: var(--primary-color);
236
+ background-color: #f0f9ff;
237
+ }
238
+
239
+ .file-upload.dragover {
240
+ border-color: var(--primary-color);
241
+ background-color: #f0f9ff;
242
+ }
243
+
244
+ .result-box {
245
+ margin-top: 1rem;
246
+ padding: 1rem;
247
+ border-radius: 8px;
248
+ font-family: monospace;
249
+ white-space: pre-wrap;
250
+ }
251
+
252
+ .result-success {
253
+ background-color: #f0fdf4;
254
+ border: 1px solid var(--success-color);
255
+ color: #166534;
256
+ }
257
+
258
+ .result-error {
259
+ background-color: #fef2f2;
260
+ border: 1px solid var(--error-color);
261
+ color: #dc2626;
262
+ }
263
+
264
+ .result-warning {
265
+ background-color: #fffbeb;
266
+ border: 1px solid var(--warning-color);
267
+ color: #92400e;
268
+ }
269
+
270
+ .document-grid {
271
+ display: grid;
272
+ grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
273
+ gap: 1rem;
274
+ margin-top: 1rem;
275
+ }
276
+
277
+ .document-card {
278
+ background: var(--card-bg);
279
+ border: 1px solid var(--border-color);
280
+ border-radius: 8px;
281
+ padding: 1rem;
282
+ box-shadow: 0 2px 4px -1px rgba(0, 0, 0, 0.1);
283
+ }
284
+
285
+ .document-card h4 {
286
+ color: var(--primary-color);
287
+ margin-bottom: 0.5rem;
288
+ }
289
+
290
+ .document-actions {
291
+ display: flex;
292
+ gap: 0.5rem;
293
+ margin-top: 1rem;
294
+ }
295
+
296
+ .category-buttons {
297
+ display: flex;
298
+ flex-wrap: wrap;
299
+ gap: 0.5rem;
300
+ margin-bottom: 1rem;
301
+ }
302
+
303
+ .category-btn {
304
+ padding: 0.5rem 1rem;
305
+ background: var(--bg-color);
306
+ border: 1px solid var(--border-color);
307
+ border-radius: 20px;
308
+ cursor: pointer;
309
+ transition: all 0.3s ease;
310
+ }
311
+
312
+ .category-btn:hover, .category-btn.active {
313
+ background: var(--primary-color);
314
+ color: white;
315
+ border-color: var(--primary-color);
316
+ }
317
+
318
+ .loading {
319
+ display: inline-block;
320
+ width: 20px;
321
+ height: 20px;
322
+ border: 2px solid #f3f3f3;
323
+ border-top: 2px solid var(--primary-color);
324
+ border-radius: 50%;
325
+ animation: spin 1s linear infinite;
326
+ }
327
+
328
+ @keyframes spin {
329
+ 0% { transform: rotate(0deg); }
330
+ 100% { transform: rotate(360deg); }
331
+ }
332
+
333
+ .similarity-badge {
334
+ display: inline-block;
335
+ padding: 0.25rem 0.5rem;
336
+ border-radius: 12px;
337
+ font-size: 0.8rem;
338
+ font-weight: 600;
339
+ }
340
+
341
+ .similarity-high {
342
+ background: #dcfce7;
343
+ color: #166534;
344
+ }
345
+
346
+ .similarity-medium {
347
+ background: #fef3c7;
348
+ color: #92400e;
349
+ }
350
+
351
+ .similarity-low {
352
+ background: #fecaca;
353
+ color: #dc2626;
354
+ }
355
+
356
+ @media (max-width: 768px) {
357
+ .header-content {
358
+ text-align: center;
359
+ }
360
+
361
+ .header h1 {
362
+ font-size: 2rem;
363
+ }
364
+
365
+ .tabs {
366
+ flex-direction: column;
367
+ }
368
+
369
+ .tab-button {
370
+ text-align: center;
371
+ }
372
+ }