Spaces:

prodevroger
/

handwritten

Sleeping

App Files Files Community

IZERE HIRWA Roger commited on 17 days ago

Commit

ed74fda

1 Parent(s): 08cd1f0

io

Browse files

Files changed (8) hide show

Dockerfile +27 -0
app.py +431 -0
main.py +431 -0
requirements.txt +19 -0
space.yaml +1 -0
static/index.html +161 -0
static/script.js +458 -0
static/styles.css +372 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM python:3.11
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+COPY . .
+# Create writable directories
+RUN mkdir -p /app/instance && chmod -R 777 /app/instance
+ENV HF_HOME=/app/transformers_cache
+RUN mkdir -p /app/transformers_cache && chmod -R 777 /app/transformers_cache
+# Create ../data directory for vector store
+RUN mkdir -p /app/data && chmod -R 777 /app/data
+RUN mkdir -p /data && chmod -R 777 /data
+# Create uploads directory
+RUN mkdir -p /app/uploads && chmod -R 777 /app/uploads
+# Create logs directory
+RUN mkdir -p /app/logs && chmod -R 777 /app/logs
+EXPOSE 7860
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,431 @@

+from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Depends, status
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+import pytesseract
+from PIL import Image
+import numpy as np
+import faiss
+import os
+import pickle
+from pdf2image import convert_from_bytes
+import torch
+import clip
+import io
+import json
+import uuid
+from datetime import datetime, timedelta
+from typing import List, Dict, Any, Optional
+import base64
+import jwt
+from passlib.context import CryptContext
+app = FastAPI(title="Handwritten Archive Document Digitalization System")
+# Security configuration
+SECRET_KEY = "your-secret-key-change-this-in-production"
+ALGORITHM = "HS256"
+ACCESS_TOKEN_EXPIRE_MINUTES = 30
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+security = HTTPBearer()
+# Default admin user (change in production)
+USERS_DB = {
+    "admin": {
+        "username": "admin",
+        "hashed_password": pwd_context.hash("admin123"),
+        "is_active": True
+    }
+}
+# Mount static files
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# --- Load or Initialize Model/Index ---
+device = "cuda" if torch.cuda.is_available() else "cpu"
+clip_model, preprocess = clip.load("ViT-B/32", device=device)
+INDEX_PATH = "data/index.faiss"
+LABELS_PATH = "data/labels.pkl"
+DOCUMENTS_PATH = "data/documents.json"
+UPLOADS_DIR = "data/uploads"
+# Ensure directories exist
+os.makedirs("data", exist_ok=True)
+os.makedirs("static", exist_ok=True)
+os.makedirs(UPLOADS_DIR, exist_ok=True)
+# Initialize index and labels with error handling
+index = faiss.IndexFlatL2(512)
+labels = []
+documents = []
+if os.path.exists(INDEX_PATH) and os.path.exists(LABELS_PATH):
+    try:
+        index = faiss.read_index(INDEX_PATH)
+        with open(LABELS_PATH, "rb") as f:
+            labels = pickle.load(f)
+        print(f"✅ Loaded existing index with {len(labels)} labels")
+    except (RuntimeError, EOFError, pickle.UnpicklingError) as e:
+        print(f"⚠️ Failed to load existing index: {e}")
+        print("🔄 Starting with fresh index")
+        if os.path.exists(INDEX_PATH):
+            os.remove(INDEX_PATH)
+        if os.path.exists(LABELS_PATH):
+            os.remove(LABELS_PATH)
+# Load documents database
+if os.path.exists(DOCUMENTS_PATH):
+    try:
+        with open(DOCUMENTS_PATH, 'r') as f:
+            documents = json.load(f)
+    except:
+        documents = []
+# Authentication functions
+def verify_password(plain_password, hashed_password):
+    return pwd_context.verify(plain_password, hashed_password)
+def get_password_hash(password):
+    return pwd_context.hash(password)
+def authenticate_user(username: str, password: str):
+    user = USERS_DB.get(username)
+    if not user or not verify_password(password, user["hashed_password"]):
+        return False
+    return user
+def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
+    to_encode = data.copy()
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(minutes=15)
+    to_encode.update({"exp": expire})
+    encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
+    return encoded_jwt
+async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    credentials_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Could not validate credentials",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    try:
+        payload = jwt.decode(credentials.credentials, SECRET_KEY, algorithms=[ALGORITHM])
+        username: str = payload.get("sub")
+        if username is None:
+            raise credentials_exception
+    except jwt.PyJWTError:
+        raise credentials_exception
+    user = USERS_DB.get(username)
+    if user is None:
+        raise credentials_exception
+    return user
+# --- Utilities ---
+def save_index():
+    try:
+        os.makedirs("data", exist_ok=True)
+        faiss.write_index(index, INDEX_PATH)
+        with open(LABELS_PATH, "wb") as f:
+            pickle.dump(labels, f)
+    except Exception as e:
+        print(f"❌ Failed to save index: {e}")
+def save_documents():
+    try:
+        with open(DOCUMENTS_PATH, 'w') as f:
+            json.dump(documents, f, indent=2)
+    except Exception as e:
+        print(f"❌ Failed to save documents: {e}")
+def image_from_pdf(pdf_bytes):
+    try:
+        images = convert_from_bytes(pdf_bytes, dpi=200)
+        return images[0]
+    except Exception as e:
+        print(f"❌ PDF conversion error: {e}")
+        return None
+def extract_text(image):
+    try:
+        if image is None:
+            return "❌ No image provided"
+        if isinstance(image, bytes):
+            image = Image.open(io.BytesIO(image))
+        elif not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        custom_config = r'--oem 3 --psm 6'
+        text = pytesseract.image_to_string(image, config=custom_config)
+        return text.strip() if text.strip() else "❓ No text detected"
+    except Exception as e:
+        return f"❌ OCR error: {str(e)}"
+def get_clip_embedding(image):
+    try:
+        if image is None:
+            return None
+        if isinstance(image, bytes):
+            image = Image.open(io.BytesIO(image))
+        elif not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        image_input = preprocess(image).unsqueeze(0).to(device)
+        with torch.no_grad():
+            image_features = clip_model.encode_image(image_input)
+            image_features = image_features / image_features.norm(dim=-1, keepdim=True)
+        return image_features.cpu().numpy()[0]
+    except Exception as e:
+        print(f"❌ CLIP embedding error: {e}")
+        return None
+def save_uploaded_file(file_content: bytes, filename: str) -> str:
+    file_id = str(uuid.uuid4())
+    file_extension = os.path.splitext(filename)[1]
+    saved_filename = f"{file_id}{file_extension}"
+    file_path = os.path.join(UPLOADS_DIR, saved_filename)
+    with open(file_path, 'wb') as f:
+        f.write(file_content)
+    return saved_filename
+# --- API Endpoints ---
+@app.get("/", response_class=HTMLResponse)
+async def dashboard():
+    with open("static/index.html", "r") as f:
+        return HTMLResponse(content=f.read())
+@app.post("/api/login")
+async def login(username: str = Form(...), password: str = Form(...)):
+    user = authenticate_user(username, password)
+    if not user:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Incorrect username or password"
+        )
+    access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+    access_token = create_access_token(
+        data={"sub": user["username"]}, expires_delta=access_token_expires
+    )
+    return {"access_token": access_token, "token_type": "bearer", "username": user["username"]}
+@app.post("/api/upload-category")
+async def upload_category(
+    file: UploadFile = File(...),
+    label: str = Form(...),
+    current_user: dict = Depends(get_current_user)
+):
+    try:
+        if not label or not label.strip():
+            raise HTTPException(status_code=400, detail="Please provide a label")
+        label = label.strip()
+        file_content = await file.read()
+        if file.content_type and file.content_type.startswith('application/pdf'):
+            image = image_from_pdf(file_content)
+        else:
+            image = Image.open(io.BytesIO(file_content))
+        if image is None:
+            raise HTTPException(status_code=400, detail="Failed to process image")
+        embedding = get_clip_embedding(image)
+        if embedding is None:
+            raise HTTPException(status_code=400, detail="Failed to generate embedding")
+        index.add(np.array([embedding]))
+        labels.append(label)
+        save_index()
+        return {"message": f"✅ Added category '{label}' (Total: {len(labels)} categories)", "status": "success"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/classify-document")
+async def classify_document(
+    file: UploadFile = File(...),
+    current_user: dict = Depends(get_current_user)
+):
+    try:
+        if len(labels) == 0:
+            raise HTTPException(status_code=400, detail="No categories in database. Please add some first.")
+        file_content = await file.read()
+        if file.content_type and file.content_type.startswith('application/pdf'):
+            image = image_from_pdf(file_content)
+        else:
+            image = Image.open(io.BytesIO(file_content))
+        if image is None:
+            raise HTTPException(status_code=400, detail="Failed to process image")
+        embedding = get_clip_embedding(image)
+        if embedding is None:
+            raise HTTPException(status_code=400, detail="Failed to generate embedding")
+        # Search for top 3 matches
+        k = min(3, len(labels))
+        D, I = index.search(np.array([embedding]), k=k)
+        if len(labels) > 0 and I[0][0] < len(labels):
+            similarity = 1 - D[0][0]
+            confidence_threshold = 0.35
+            best_match = labels[I[0][0]]
+            matches = []
+            for i in range(min(k, len(D[0]))):
+                if I[0][i] < len(labels):
+                    sim = 1 - D[0][i]
+                    matches.append({"category": labels[I[0][i]], "similarity": round(sim, 3)})
+            # Save classified document
+            if similarity >= confidence_threshold:
+                saved_filename = save_uploaded_file(file_content, file.filename)
+                ocr_text = extract_text(image)
+                document = {
+                    "id": str(uuid.uuid4()),
+                    "filename": saved_filename,
+                    "original_filename": file.filename,
+                    "category": best_match,
+                    "similarity": round(similarity, 3),
+                    "ocr_text": ocr_text,
+                    "upload_date": datetime.now().isoformat(),
+                    "file_path": os.path.join(UPLOADS_DIR, saved_filename)
+                }
+                documents.append(document)
+                save_documents()
+                return {
+                    "status": "success",
+                    "category": best_match,
+                    "similarity": round(similarity, 3),
+                    "confidence": "high" if similarity >= confidence_threshold else "low",
+                    "matches": matches,
+                    "document_saved": True,
+                    "document_id": document["id"]
+                }
+            else:
+                return {
+                    "status": "low_confidence",
+                    "category": best_match,
+                    "similarity": round(similarity, 3),
+                    "confidence": "low",
+                    "matches": matches,
+                    "document_saved": False
+                }
+        raise HTTPException(status_code=400, detail="Document not recognized")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/categories")
+async def get_categories(current_user: dict = Depends(get_current_user)):
+    categories = list(set(labels))  # Remove duplicates
+    category_counts = {}
+    for label in labels:
+        category_counts[label] = category_counts.get(label, 0) + 1
+    return {"categories": categories, "counts": category_counts}
+@app.get("/api/documents/{category}")
+async def get_documents_by_category(
+    category: str,
+    current_user: dict = Depends(get_current_user)
+):
+    category_documents = [doc for doc in documents if doc["category"] == category]
+    return {"documents": category_documents, "count": len(category_documents)}
+@app.get("/api/documents")
+async def get_all_documents(current_user: dict = Depends(get_current_user)):
+    return {"documents": documents, "count": len(documents)}
+@app.delete("/api/documents/{document_id}")
+async def delete_document(
+    document_id: str,
+    current_user: dict = Depends(get_current_user)
+):
+    try:
+        # Find document
+        document_index = None
+        document_to_delete = None
+        for i, doc in enumerate(documents):
+            if doc["id"] == document_id:
+                document_index = i
+                document_to_delete = doc
+                break
+        if document_to_delete is None:
+            raise HTTPException(status_code=404, detail="Document not found")
+        # Delete physical file
+        file_path = document_to_delete.get("file_path")
+        if file_path and os.path.exists(file_path):
+            os.remove(file_path)
+        # Remove from documents list
+        documents.pop(document_index)
+        save_documents()
+        return {"message": "Document deleted successfully", "status": "success"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/ocr")
+async def ocr_document(
+    file: UploadFile = File(...),
+    current_user: dict = Depends(get_current_user)
+):
+    try:
+        file_content = await file.read()
+        if file.content_type and file.content_type.startswith('application/pdf'):
+            image = image_from_pdf(file_content)
+        else:
+            image = Image.open(io.BytesIO(file_content))
+        if image is None:
+            raise HTTPException(status_code=400, detail="Failed to process image")
+        text = extract_text(image)
+        return {"text": text, "status": "success"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/stats")
+async def get_stats(current_user: dict = Depends(get_current_user)):
+    category_stats = {}
+    for doc in documents:
+        category = doc["category"]
+        if category not in category_stats:
+            category_stats[category] = 0
+        category_stats[category] += 1
+    return {
+        "total_categories": len(set(labels)),
+        "total_documents": len(documents),
+        "category_distribution": category_stats
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

main.py ADDED Viewed

	@@ -0,0 +1,431 @@

+from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Depends, status
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+import pytesseract
+from PIL import Image
+import numpy as np
+import faiss
+import os
+import pickle
+from pdf2image import convert_from_bytes
+import torch
+import clip
+import io
+import json
+import uuid
+from datetime import datetime, timedelta
+from typing import List, Dict, Any, Optional
+import base64
+import jwt
+from passlib.context import CryptContext
+app = FastAPI(title="Handwritten Archive Document Digitalization System")
+# Security configuration
+SECRET_KEY = "your-secret-key-change-this-in-production"
+ALGORITHM = "HS256"
+ACCESS_TOKEN_EXPIRE_MINUTES = 30
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+security = HTTPBearer()
+# Default admin user (change in production)
+USERS_DB = {
+    "admin": {
+        "username": "admin",
+        "hashed_password": pwd_context.hash("admin123"),
+        "is_active": True
+    }
+}
+# Mount static files
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# --- Load or Initialize Model/Index ---
+device = "cuda" if torch.cuda.is_available() else "cpu"
+clip_model, preprocess = clip.load("ViT-B/32", device=device)
+INDEX_PATH = "data/index.faiss"
+LABELS_PATH = "data/labels.pkl"
+DOCUMENTS_PATH = "data/documents.json"
+UPLOADS_DIR = "data/uploads"
+# Ensure directories exist
+os.makedirs("data", exist_ok=True)
+os.makedirs("static", exist_ok=True)
+os.makedirs(UPLOADS_DIR, exist_ok=True)
+# Initialize index and labels with error handling
+index = faiss.IndexFlatL2(512)
+labels = []
+documents = []
+if os.path.exists(INDEX_PATH) and os.path.exists(LABELS_PATH):
+    try:
+        index = faiss.read_index(INDEX_PATH)
+        with open(LABELS_PATH, "rb") as f:
+            labels = pickle.load(f)
+        print(f"✅ Loaded existing index with {len(labels)} labels")
+    except (RuntimeError, EOFError, pickle.UnpicklingError) as e:
+        print(f"⚠️ Failed to load existing index: {e}")
+        print("🔄 Starting with fresh index")
+        if os.path.exists(INDEX_PATH):
+            os.remove(INDEX_PATH)
+        if os.path.exists(LABELS_PATH):
+            os.remove(LABELS_PATH)
+# Load documents database
+if os.path.exists(DOCUMENTS_PATH):
+    try:
+        with open(DOCUMENTS_PATH, 'r') as f:
+            documents = json.load(f)
+    except:
+        documents = []
+# Authentication functions
+def verify_password(plain_password, hashed_password):
+    return pwd_context.verify(plain_password, hashed_password)
+def get_password_hash(password):
+    return pwd_context.hash(password)
+def authenticate_user(username: str, password: str):
+    user = USERS_DB.get(username)
+    if not user or not verify_password(password, user["hashed_password"]):
+        return False
+    return user
+def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
+    to_encode = data.copy()
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(minutes=15)
+    to_encode.update({"exp": expire})
+    encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
+    return encoded_jwt
+async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    credentials_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Could not validate credentials",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    try:
+        payload = jwt.decode(credentials.credentials, SECRET_KEY, algorithms=[ALGORITHM])
+        username: str = payload.get("sub")
+        if username is None:
+            raise credentials_exception
+    except jwt.PyJWTError:
+        raise credentials_exception
+    user = USERS_DB.get(username)
+    if user is None:
+        raise credentials_exception
+    return user
+# --- Utilities ---
+def save_index():
+    try:
+        os.makedirs("data", exist_ok=True)
+        faiss.write_index(index, INDEX_PATH)
+        with open(LABELS_PATH, "wb") as f:
+            pickle.dump(labels, f)
+    except Exception as e:
+        print(f"❌ Failed to save index: {e}")
+def save_documents():
+    try:
+        with open(DOCUMENTS_PATH, 'w') as f:
+            json.dump(documents, f, indent=2)
+    except Exception as e:
+        print(f"❌ Failed to save documents: {e}")
+def image_from_pdf(pdf_bytes):
+    try:
+        images = convert_from_bytes(pdf_bytes, dpi=200)
+        return images[0]
+    except Exception as e:
+        print(f"❌ PDF conversion error: {e}")
+        return None
+def extract_text(image):
+    try:
+        if image is None:
+            return "❌ No image provided"
+        if isinstance(image, bytes):
+            image = Image.open(io.BytesIO(image))
+        elif not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        custom_config = r'--oem 3 --psm 6'
+        text = pytesseract.image_to_string(image, config=custom_config)
+        return text.strip() if text.strip() else "❓ No text detected"
+    except Exception as e:
+        return f"❌ OCR error: {str(e)}"
+def get_clip_embedding(image):
+    try:
+        if image is None:
+            return None
+        if isinstance(image, bytes):
+            image = Image.open(io.BytesIO(image))
+        elif not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        image_input = preprocess(image).unsqueeze(0).to(device)
+        with torch.no_grad():
+            image_features = clip_model.encode_image(image_input)
+            image_features = image_features / image_features.norm(dim=-1, keepdim=True)
+        return image_features.cpu().numpy()[0]
+    except Exception as e:
+        print(f"❌ CLIP embedding error: {e}")
+        return None
+def save_uploaded_file(file_content: bytes, filename: str) -> str:
+    file_id = str(uuid.uuid4())
+    file_extension = os.path.splitext(filename)[1]
+    saved_filename = f"{file_id}{file_extension}"
+    file_path = os.path.join(UPLOADS_DIR, saved_filename)
+    with open(file_path, 'wb') as f:
+        f.write(file_content)
+    return saved_filename
+# --- API Endpoints ---
+@app.get("/", response_class=HTMLResponse)
+async def dashboard():
+    with open("static/index.html", "r") as f:
+        return HTMLResponse(content=f.read())
+@app.post("/api/login")
+async def login(username: str = Form(...), password: str = Form(...)):
+    user = authenticate_user(username, password)
+    if not user:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Incorrect username or password"
+        )
+    access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+    access_token = create_access_token(
+        data={"sub": user["username"]}, expires_delta=access_token_expires
+    )
+    return {"access_token": access_token, "token_type": "bearer", "username": user["username"]}
+@app.post("/api/upload-category")
+async def upload_category(
+    file: UploadFile = File(...),
+    label: str = Form(...),
+    current_user: dict = Depends(get_current_user)
+):
+    try:
+        if not label or not label.strip():
+            raise HTTPException(status_code=400, detail="Please provide a label")
+        label = label.strip()
+        file_content = await file.read()
+        if file.content_type and file.content_type.startswith('application/pdf'):
+            image = image_from_pdf(file_content)
+        else:
+            image = Image.open(io.BytesIO(file_content))
+        if image is None:
+            raise HTTPException(status_code=400, detail="Failed to process image")
+        embedding = get_clip_embedding(image)
+        if embedding is None:
+            raise HTTPException(status_code=400, detail="Failed to generate embedding")
+        index.add(np.array([embedding]))
+        labels.append(label)
+        save_index()
+        return {"message": f"✅ Added category '{label}' (Total: {len(labels)} categories)", "status": "success"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/classify-document")
+async def classify_document(
+    file: UploadFile = File(...),
+    current_user: dict = Depends(get_current_user)
+):
+    try:
+        if len(labels) == 0:
+            raise HTTPException(status_code=400, detail="No categories in database. Please add some first.")
+        file_content = await file.read()
+        if file.content_type and file.content_type.startswith('application/pdf'):
+            image = image_from_pdf(file_content)
+        else:
+            image = Image.open(io.BytesIO(file_content))
+        if image is None:
+            raise HTTPException(status_code=400, detail="Failed to process image")
+        embedding = get_clip_embedding(image)
+        if embedding is None:
+            raise HTTPException(status_code=400, detail="Failed to generate embedding")
+        # Search for top 3 matches
+        k = min(3, len(labels))
+        D, I = index.search(np.array([embedding]), k=k)
+        if len(labels) > 0 and I[0][0] < len(labels):
+            similarity = 1 - D[0][0]
+            confidence_threshold = 0.35
+            best_match = labels[I[0][0]]
+            matches = []
+            for i in range(min(k, len(D[0]))):
+                if I[0][i] < len(labels):
+                    sim = 1 - D[0][i]
+                    matches.append({"category": labels[I[0][i]], "similarity": round(sim, 3)})
+            # Save classified document
+            if similarity >= confidence_threshold:
+                saved_filename = save_uploaded_file(file_content, file.filename)
+                ocr_text = extract_text(image)
+                document = {
+                    "id": str(uuid.uuid4()),
+                    "filename": saved_filename,
+                    "original_filename": file.filename,
+                    "category": best_match,
+                    "similarity": round(similarity, 3),
+                    "ocr_text": ocr_text,
+                    "upload_date": datetime.now().isoformat(),
+                    "file_path": os.path.join(UPLOADS_DIR, saved_filename)
+                }
+                documents.append(document)
+                save_documents()
+                return {
+                    "status": "success",
+                    "category": best_match,
+                    "similarity": round(similarity, 3),
+                    "confidence": "high" if similarity >= confidence_threshold else "low",
+                    "matches": matches,
+                    "document_saved": True,
+                    "document_id": document["id"]
+                }
+            else:
+                return {
+                    "status": "low_confidence",
+                    "category": best_match,
+                    "similarity": round(similarity, 3),
+                    "confidence": "low",
+                    "matches": matches,
+                    "document_saved": False
+                }
+        raise HTTPException(status_code=400, detail="Document not recognized")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/categories")
+async def get_categories(current_user: dict = Depends(get_current_user)):
+    categories = list(set(labels))  # Remove duplicates
+    category_counts = {}
+    for label in labels:
+        category_counts[label] = category_counts.get(label, 0) + 1
+    return {"categories": categories, "counts": category_counts}
+@app.get("/api/documents/{category}")
+async def get_documents_by_category(
+    category: str,
+    current_user: dict = Depends(get_current_user)
+):
+    category_documents = [doc for doc in documents if doc["category"] == category]
+    return {"documents": category_documents, "count": len(category_documents)}
+@app.get("/api/documents")
+async def get_all_documents(current_user: dict = Depends(get_current_user)):
+    return {"documents": documents, "count": len(documents)}
+@app.delete("/api/documents/{document_id}")
+async def delete_document(
+    document_id: str,
+    current_user: dict = Depends(get_current_user)
+):
+    try:
+        # Find document
+        document_index = None
+        document_to_delete = None
+        for i, doc in enumerate(documents):
+            if doc["id"] == document_id:
+                document_index = i
+                document_to_delete = doc
+                break
+        if document_to_delete is None:
+            raise HTTPException(status_code=404, detail="Document not found")
+        # Delete physical file
+        file_path = document_to_delete.get("file_path")
+        if file_path and os.path.exists(file_path):
+            os.remove(file_path)
+        # Remove from documents list
+        documents.pop(document_index)
+        save_documents()
+        return {"message": "Document deleted successfully", "status": "success"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/ocr")
+async def ocr_document(
+    file: UploadFile = File(...),
+    current_user: dict = Depends(get_current_user)
+):
+    try:
+        file_content = await file.read()
+        if file.content_type and file.content_type.startswith('application/pdf'):
+            image = image_from_pdf(file_content)
+        else:
+            image = Image.open(io.BytesIO(file_content))
+        if image is None:
+            raise HTTPException(status_code=400, detail="Failed to process image")
+        text = extract_text(image)
+        return {"text": text, "status": "success"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/stats")
+async def get_stats(current_user: dict = Depends(get_current_user)):
+    category_stats = {}
+    for doc in documents:
+        category = doc["category"]
+        if category not in category_stats:
+            category_stats[category] = 0
+        category_stats[category] += 1
+    return {
+        "total_categories": len(set(labels)),
+        "total_documents": len(documents),
+        "category_distribution": category_stats
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+fastapi
+uvicorn[standard]
+python-multipart
+python-jose[cryptography]
+passlib[bcrypt]
+bcrypt
+gradio
+faiss-cpu
+pytesseract
+pdf2image
+sentence-transformers
+torch
+torchvision
+Pillow
+ftfy
+regex
+tqdm
+git+https://github.com/openai/CLIP.git
+poppler-utils

space.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ sdk: "docker"

static/index.html ADDED Viewed

	@@ -0,0 +1,161 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Handwritten Archive Document Digitalization System</title>
+    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
+    <link rel="stylesheet" href="/static/styles.css">
+</head>
+<body>
+    <!-- Login Modal -->
+    <div id="loginModal" class="modal">
+        <div class="modal-content">
+            <h2><i class="fas fa-lock"></i> Login Required</h2>
+            <form id="loginForm">
+                <div class="form-group">
+                    <label for="username">Username</label>
+                    <input type="text" id="username" class="form-control" required value="admin">
+                </div>
+                <div class="form-group">
+                    <label for="password">Password</label>
+                    <input type="password" id="password" class="form-control" required value="admin123">
+                </div>
+                <button type="submit" class="btn btn-primary">
+                    <i class="fas fa-sign-in-alt"></i> Login
+                </button>
+            </form>
+            <div id="loginResult"></div>
+        </div>
+    </div>
+    <!-- Main Application -->
+    <div id="mainApp" style="display: none;">
+        <div class="container">
+            <div class="header">
+                <div class="header-content">
+                    <div>
+                        <h1><i class="fas fa-archive"></i> Archive Digitalization System</h1>
+                        <p>Handwritten Document Classification & Storage</p>
+                    </div>
+                    <div class="user-info">
+                        <span id="welcomeUser"></span>
+                        <button class="btn btn-secondary" onclick="logout()">
+                            <i class="fas fa-sign-out-alt"></i> Logout
+                        </button>
+                    </div>
+                </div>
+            </div>
+            <div class="dashboard-stats" id="stats">
+                <!-- Stats will be loaded here -->
+            </div>
+            <div class="tabs">
+                <button class="tab-button active" onclick="showTab('upload')">
+                    <i class="fas fa-upload"></i> Upload Categories
+                </button>
+                <button class="tab-button" onclick="showTab('classify')">
+                    <i class="fas fa-search"></i> Classify Documents
+                </button>
+                <button class="tab-button" onclick="showTab('browse')">
+                    <i class="fas fa-folder-open"></i> Browse Archive
+                </button>
+                <button class="tab-button" onclick="showTab('ocr')">
+                    <i class="fas fa-eye"></i> OCR Text
+                </button>
+            </div>
+            <!-- Upload Categories Tab -->
+            <div id="upload" class="tab-content active">
+                <h2><i class="fas fa-tags"></i> Upload Document Categories</h2>
+                <p>Upload sample documents for each category to train the classification system.</p>
+                <form id="uploadForm">
+                    <div class="form-group">
+                        <label for="categoryFile">Document File (Image or PDF)</label>
+                        <div class="file-upload" id="categoryUpload">
+                            <i class="fas fa-cloud-upload-alt fa-2x"></i>
+                            <p>Click to select or drag & drop files here</p>
+                            <input type="file" id="categoryFile" accept="image/*,.pdf" style="display: none;">
+                        </div>
+                    </div>
+                    <div class="form-group">
+                        <label for="categoryLabel">Category Label</label>
+                        <input type="text" id="categoryLabel" class="form-control" placeholder="e.g., birth_certificate, passport, diploma">
+                    </div>
+                    <button type="submit" class="btn btn-primary">
+                        <i class="fas fa-plus"></i> Add Category
+                    </button>
+                </form>
+                <div id="uploadResult"></div>
+            </div>
+            <!-- Classify Documents Tab -->
+            <div id="classify" class="tab-content">
+                <h2><i class="fas fa-robot"></i> Classify & Store Documents</h2>
+                <p>Upload documents to automatically classify and store them in the archive (min. 35% confidence).</p>
+                <form id="classifyForm">
+                    <div class="form-group">
+                        <label for="classifyFile">Document to Classify</label>
+                        <div class="file-upload" id="classifyUpload">
+                            <i class="fas fa-file fa-2x"></i>
+                            <p>Click to select or drag & drop files here</p>
+                            <input type="file" id="classifyFile" accept="image/*,.pdf" style="display: none;">
+                        </div>
+                    </div>
+                    <button type="submit" class="btn btn-success">
+                        <i class="fas fa-search"></i> Classify Document
+                    </button>
+                </form>
+                <div id="classifyResult"></div>
+            </div>
+            <!-- Browse Archive Tab -->
+            <div id="browse" class="tab-content">
+                <h2><i class="fas fa-archive"></i> Browse Document Archive</h2>
+                <p>Browse and search through your classified documents by category.</p>
+                <div class="category-buttons" id="categoryButtons">
+                    <!-- Category buttons will be loaded here -->
+                </div>
+                <div id="documentsContainer">
+                    <!-- Documents will be loaded here -->
+                </div>
+            </div>
+            <!-- OCR Text Tab -->
+            <div id="ocr" class="tab-content">
+                <h2><i class="fas fa-eye"></i> OCR Text Extraction</h2>
+                <p>Extract text from documents using Optical Character Recognition.</p>
+                <form id="ocrForm">
+                    <div class="form-group">
+                        <label for="ocrFile">Document File</label>
+                        <div class="file-upload" id="ocrUpload">
+                            <i class="fas fa-file-alt fa-2x"></i>
+                            <p>Click to select or drag & drop files here</p>
+                            <input type="file" id="ocrFile" accept="image/*,.pdf" style="display: none;">
+                        </div>
+                    </div>
+                    <button type="submit" class="btn btn-primary">
+                        <i class="fas fa-search"></i> Extract Text
+                    </button>
+                </form>
+                <div id="ocrResult"></div>
+            </div>
+        </div>
+    </div>
+    <script src="/static/script.js"></script>
+</body>
+</html>

static/script.js ADDED Viewed

	@@ -0,0 +1,458 @@

+// Global variables
+let categories = [];
+let documents = [];
+let authToken = null;
+let currentUser = null;
+// Initialize app
+document.addEventListener('DOMContentLoaded', function() {
+    checkAuth();
+});
+// Authentication functions
+function checkAuth() {
+    authToken = localStorage.getItem('authToken');
+    currentUser = localStorage.getItem('currentUser');
+    if (authToken && currentUser) {
+        showMainApp();
+        document.getElementById('welcomeUser').textContent = `Welcome, ${currentUser}`;
+        loadStats();
+        loadCategories();
+        setupFileUploads();
+    } else {
+        showLoginModal();
+    }
+}
+function showLoginModal() {
+    document.getElementById('loginModal').style.display = 'flex';
+    document.getElementById('mainApp').style.display = 'none';
+}
+function showMainApp() {
+    document.getElementById('loginModal').style.display = 'none';
+    document.getElementById('mainApp').style.display = 'block';
+}
+function logout() {
+    localStorage.removeItem('authToken');
+    localStorage.removeItem('currentUser');
+    authToken = null;
+    currentUser = null;
+    showLoginModal();
+}
+// Login form handler
+document.getElementById('loginForm').addEventListener('submit', async (e) => {
+    e.preventDefault();
+    const username = document.getElementById('username').value;
+    const password = document.getElementById('password').value;
+    const resultDiv = document.getElementById('loginResult');
+    const formData = new FormData();
+    formData.append('username', username);
+    formData.append('password', password);
+    try {
+        const response = await fetch('/api/login', {
+            method: 'POST',
+            body: formData
+        });
+        const result = await response.json();
+        if (response.ok) {
+            authToken = result.access_token;
+            currentUser = result.username;
+            localStorage.setItem('authToken', authToken);
+            localStorage.setItem('currentUser', currentUser);
+            showMainApp();
+            document.getElementById('welcomeUser').textContent = `Welcome, ${currentUser}`;
+            loadStats();
+            loadCategories();
+            setupFileUploads();
+        } else {
+            showResult(resultDiv, result.detail, 'error');
+        }
+    } catch (error) {
+        showResult(resultDiv, 'Login failed: ' + error.message, 'error');
+    }
+});
+// API request with authentication
+async function authenticatedFetch(url, options = {}) {
+    if (!authToken) {
+        throw new Error('No authentication token');
+    }
+    const defaultOptions = {
+        headers: {
+            'Authorization': `Bearer ${authToken}`,
+            ...options.headers
+        }
+    };
+    const response = await fetch(url, { ...options, ...defaultOptions });
+    if (response.status === 401) {
+        logout();
+        throw new Error('Authentication failed');
+    }
+    return response;
+}
+// Tab management
+function showTab(tabName) {
+    // Hide all tabs
+    document.querySelectorAll('.tab-content').forEach(tab => {
+        tab.classList.remove('active');
+    });
+    document.querySelectorAll('.tab-button').forEach(btn => {
+        btn.classList.remove('active');
+    });
+    // Show selected tab
+    document.getElementById(tabName).classList.add('active');
+    event.target.classList.add('active');
+    // Load data for specific tabs
+    if (tabName === 'browse') {
+        loadCategories();
+        loadAllDocuments();
+    }
+}
+// Setup file upload drag & drop
+function setupFileUploads() {
+    const uploads = [
+        { div: 'categoryUpload', input: 'categoryFile' },
+        { div: 'classifyUpload', input: 'classifyFile' },
+        { div: 'ocrUpload', input: 'ocrFile' }
+    ];
+    uploads.forEach(upload => {
+        const uploadDiv = document.getElementById(upload.div);
+        const fileInput = document.getElementById(upload.input);
+        uploadDiv.addEventListener('click', () => fileInput.click());
+        uploadDiv.addEventListener('dragover', (e) => {
+            e.preventDefault();
+            uploadDiv.classList.add('dragover');
+        });
+        uploadDiv.addEventListener('dragleave', () => {
+            uploadDiv.classList.remove('dragover');
+        });
+        uploadDiv.addEventListener('drop', (e) => {
+            e.preventDefault();
+            uploadDiv.classList.remove('dragover');
+            const files = e.dataTransfer.files;
+            if (files.length > 0) {
+                fileInput.files = files;
+                uploadDiv.querySelector('p').textContent = files[0].name;
+            }
+        });
+        fileInput.addEventListener('change', () => {
+            if (fileInput.files.length > 0) {
+                uploadDiv.querySelector('p').textContent = fileInput.files[0].name;
+            }
+        });
+    });
+}
+// Load dashboard stats
+async function loadStats() {
+    try {
+        const response = await authenticatedFetch('/api/stats');
+        const stats = await response.json();
+        const statsHtml = `
+            <div class="stat-card">
+                <h3>${stats.total_categories}</h3>
+                <p><i class="fas fa-tags"></i> Total Categories</p>
+            </div>
+            <div class="stat-card">
+                <h3>${stats.total_documents}</h3>
+                <p><i class="fas fa-file"></i> Documents Archived</p>
+            </div>
+            <div class="stat-card">
+                <h3>35%</h3>
+                <p><i class="fas fa-percentage"></i> Min Confidence</p>
+            </div>
+        `;
+        document.getElementById('stats').innerHTML = statsHtml;
+    } catch (error) {
+        console.error('Error loading stats:', error);
+    }
+}
+// Load categories
+async function loadCategories() {
+    try {
+        const response = await authenticatedFetch('/api/categories');
+        const data = await response.json();
+        categories = data.categories;
+        const buttonsHtml = `
+            <button class="category-btn active" onclick="filterDocuments('all')">
+                All Documents
+            </button>
+            ${categories.map(cat => `
+                <button class="category-btn" onclick="filterDocuments('${cat}')">
+                    ${cat} (${data.counts[cat] || 0})
+                </button>
+            `).join('')}
+        `;
+        document.getElementById('categoryButtons').innerHTML = buttonsHtml;
+    } catch (error) {
+        console.error('Error loading categories:', error);
+    }
+}
+// Load all documents
+async function loadAllDocuments() {
+    try {
+        const response = await authenticatedFetch('/api/documents');
+        const data = await response.json();
+        documents = data.documents;
+        displayDocuments(documents);
+    } catch (error) {
+        console.error('Error loading documents:', error);
+    }
+}
+// Filter documents by category
+async function filterDocuments(category) {
+    // Update active button
+    document.querySelectorAll('.category-btn').forEach(btn => {
+        btn.classList.remove('active');
+    });
+    event.target.classList.add('active');
+    try {
+        let filteredDocs;
+        if (category === 'all') {
+            const response = await authenticatedFetch('/api/documents');
+            const data = await response.json();
+            filteredDocs = data.documents;
+        } else {
+            const response = await authenticatedFetch(`/api/documents/${category}`);
+            const data = await response.json();
+            filteredDocs = data.documents;
+        }
+        displayDocuments(filteredDocs);
+    } catch (error) {
+        console.error('Error filtering documents:', error);
+    }
+}
+// Delete document
+async function deleteDocument(documentId, filename) {
+    if (!confirm(`Are you sure you want to delete "${filename}"? This action cannot be undone.`)) {
+        return;
+    }
+    try {
+        const response = await authenticatedFetch(`/api/documents/${documentId}`, {
+            method: 'DELETE'
+        });
+        const result = await response.json();
+        if (response.ok) {
+            // Refresh the current view
+            loadAllDocuments();
+            loadStats();
+            loadCategories();
+            alert('Document deleted successfully');
+        } else {
+            alert('Failed to delete document: ' + result.detail);
+        }
+    } catch (error) {
+        alert('Error deleting document: ' + error.message);
+    }
+}
+// Display documents
+function displayDocuments(docs) {
+    const container = document.getElementById('documentsContainer');
+    if (docs.length === 0) {
+        container.innerHTML = '<p>No documents found for this category.</p>';
+        return;
+    }
+    const docsHtml = docs.map(doc => {
+        const similarityClass = doc.similarity >= 0.7 ? 'similarity-high' :
+                              doc.similarity >= 0.5 ? 'similarity-medium' : 'similarity-low';
+        return `
+            <div class="document-card">
+                <h4><i class="fas fa-file"></i> ${doc.original_filename}</h4>
+                <p><strong>Category:</strong> ${doc.category}</p>
+                <p><strong>Confidence:</strong>
+                    <span class="similarity-badge ${similarityClass}">
+                        ${(doc.similarity * 100).toFixed(1)}%
+                    </span>
+                </p>
+                <p><strong>Upload Date:</strong> ${new Date(doc.upload_date).toLocaleDateString()}</p>
+                <p><strong>OCR Preview:</strong></p>
+                <div style="max-height: 100px; overflow-y: auto; background: #f8f9fa; padding: 0.5rem; border-radius: 4px; font-size: 0.8rem;">
+                    ${doc.ocr_text.substring(0, 200)}${doc.ocr_text.length > 200 ? '...' : ''}
+                </div>
+                <div class="document-actions">
+                    <button class="btn btn-danger" onclick="deleteDocument('${doc.id}', '${doc.original_filename}')">
+                        <i class="fas fa-trash"></i> Delete
+                    </button>
+                </div>
+            </div>
+        `;
+    }).join('');
+    container.innerHTML = `<div class="document-grid">${docsHtml}</div>`;
+}
+// Form submissions
+document.getElementById('uploadForm').addEventListener('submit', async (e) => {
+    e.preventDefault();
+    const fileInput = document.getElementById('categoryFile');
+    const labelInput = document.getElementById('categoryLabel');
+    const resultDiv = document.getElementById('uploadResult');
+    if (!fileInput.files[0] || !labelInput.value.trim()) {
+        showResult(resultDiv, 'Please select a file and enter a label.', 'error');
+        return;
+    }
+    const formData = new FormData();
+    formData.append('file', fileInput.files[0]);
+    formData.append('label', labelInput.value.trim());
+    showResult(resultDiv, '<div class="loading"></div> Uploading...', 'info');
+    try {
+        const response = await authenticatedFetch('/api/upload-category', {
+            method: 'POST',
+            body: formData
+        });
+        const result = await response.json();
+        if (response.ok) {
+            showResult(resultDiv, result.message, 'success');
+            labelInput.value = '';
+            fileInput.value = '';
+            document.querySelector('#categoryUpload p').textContent = 'Click to select or drag & drop files here';
+            loadStats();
+            loadCategories();
+        } else {
+            showResult(resultDiv, result.detail, 'error');
+        }
+    } catch (error) {
+        showResult(resultDiv, 'Upload failed: ' + error.message, 'error');
+    }
+});
+document.getElementById('classifyForm').addEventListener('submit', async (e) => {
+    e.preventDefault();
+    const fileInput = document.getElementById('classifyFile');
+    const resultDiv = document.getElementById('classifyResult');
+    if (!fileInput.files[0]) {
+        showResult(resultDiv, 'Please select a file to classify.', 'error');
+        return;
+    }
+    const formData = new FormData();
+    formData.append('file', fileInput.files[0]);
+    showResult(resultDiv, '<div class="loading"></div> Classifying...', 'info');
+    try {
+        const response = await authenticatedFetch('/api/classify-document', {
+            method: 'POST',
+            body: formData
+        });
+        const result = await response.json();
+        if (response.ok) {
+            const confidenceText = result.confidence === 'high' ? '✅ High Confidence' : '⚠️ Low Confidence';
+            const savedText = result.document_saved ? '\n📁 Document saved to archive' : '';
+            let matchesText = '\n\nTop matches:\n';
+            result.matches.forEach(match => {
+                matchesText += `• ${match.category}: ${(match.similarity * 100).toFixed(1)}%\n`;
+            });
+            showResult(resultDiv,
+                `🎯 Classification: ${result.category}\n` +
+                `${confidenceText} (${(result.similarity * 100).toFixed(1)}%)${savedText}${matchesText}`,
+                result.confidence === 'high' ? 'success' : 'warning'
+            );
+            fileInput.value = '';
+            document.querySelector('#classifyUpload p').textContent = 'Click to select or drag & drop files here';
+            loadStats();
+        } else {
+            showResult(resultDiv, result.detail, 'error');
+        }
+    } catch (error) {
+        showResult(resultDiv, 'Classification failed: ' + error.message, 'error');
+    }
+});
+document.getElementById('ocrForm').addEventListener('submit', async (e) => {
+    e.preventDefault();
+    const fileInput = document.getElementById('ocrFile');
+    const resultDiv = document.getElementById('ocrResult');
+    if (!fileInput.files[0]) {
+        showResult(resultDiv, 'Please select a file for OCR.', 'error');
+        return;
+    }
+    const formData = new FormData();
+    formData.append('file', fileInput.files[0]);
+    showResult(resultDiv, '<div class="loading"></div> Extracting text...', 'info');
+    try {
+        const response = await authenticatedFetch('/api/ocr', {
+            method: 'POST',
+            body: formData
+        });
+        const result = await response.json();
+        if (response.ok) {
+            showResult(resultDiv, result.text, 'success');
+        } else {
+            showResult(resultDiv, result.detail, 'error');
+        }
+    } catch (error) {
+        showResult(resultDiv, 'OCR failed: ' + error.message, 'error');
+    }
+});
+// Utility function to show results
+function showResult(element, message, type) {
+    const className = type === 'success' ? 'result-success' :
+                    type === 'error' ? 'result-error' :
+                    type === 'warning' ? 'result-warning' : '';
+    element.innerHTML = `<div class="result-box ${className}">${message}</div>`;
+}

static/styles.css ADDED Viewed

	@@ -0,0 +1,372 @@

+:root {
+    --primary-color: #2563eb;
+    --secondary-color: #1e40af;
+    --success-color: #10b981;
+    --warning-color: #f59e0b;
+    --error-color: #ef4444;
+    --bg-color: #f8fafc;
+    --card-bg: #ffffff;
+    --text-primary: #1f2937;
+    --text-secondary: #6b7280;
+    --border-color: #e5e7eb;
+}
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+}
+body {
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    background-color: var(--bg-color);
+    color: var(--text-primary);
+    line-height: 1.6;
+}
+/* Modal Styles */
+.modal {
+    display: flex;
+    position: fixed;
+    z-index: 1000;
+    left: 0;
+    top: 0;
+    width: 100%;
+    height: 100%;
+    background-color: rgba(0,0,0,0.5);
+    align-items: center;
+    justify-content: center;
+}
+.modal-content {
+    background-color: var(--card-bg);
+    padding: 2rem;
+    border-radius: 12px;
+    box-shadow: 0 10px 25px rgba(0,0,0,0.2);
+    width: 90%;
+    max-width: 400px;
+}
+.modal-content h2 {
+    text-align: center;
+    margin-bottom: 1.5rem;
+    color: var(--primary-color);
+}
+.container {
+    max-width: 1200px;
+    margin: 0 auto;
+    padding: 20px;
+}
+.header {
+    background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
+    color: white;
+    padding: 2rem;
+    margin-bottom: 2rem;
+    border-radius: 12px;
+}
+.header-content {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    flex-wrap: wrap;
+    gap: 1rem;
+}
+.header h1 {
+    font-size: 2.5rem;
+    margin-bottom: 0.5rem;
+}
+.header p {
+    font-size: 1.1rem;
+    opacity: 0.9;
+}
+.user-info {
+    display: flex;
+    align-items: center;
+    gap: 1rem;
+}
+.user-info span {
+    font-weight: 500;
+}
+.dashboard-stats {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+    gap: 1.5rem;
+    margin-bottom: 2rem;
+}
+.stat-card {
+    background: var(--card-bg);
+    padding: 1.5rem;
+    border-radius: 12px;
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+    border: 1px solid var(--border-color);
+}
+.stat-card h3 {
+    font-size: 2rem;
+    color: var(--primary-color);
+    margin-bottom: 0.5rem;
+}
+.stat-card p {
+    color: var(--text-secondary);
+    font-size: 0.9rem;
+}
+.tabs {
+    display: flex;
+    margin-bottom: 2rem;
+    background: var(--card-bg);
+    border-radius: 12px;
+    padding: 0.5rem;
+    box-shadow: 0 2px 4px -1px rgba(0, 0, 0, 0.1);
+}
+.tab-button {
+    flex: 1;
+    padding: 1rem;
+    border: none;
+    background: transparent;
+    cursor: pointer;
+    border-radius: 8px;
+    font-weight: 500;
+    transition: all 0.3s ease;
+}
+.tab-button.active {
+    background: var(--primary-color);
+    color: white;
+}
+.tab-content {
+    display: none;
+    background: var(--card-bg);
+    padding: 2rem;
+    border-radius: 12px;
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+    border: 1px solid var(--border-color);
+}
+.tab-content.active {
+    display: block;
+}
+.form-group {
+    margin-bottom: 1.5rem;
+}
+.form-group label {
+    display: block;
+    margin-bottom: 0.5rem;
+    font-weight: 600;
+    color: var(--text-primary);
+}
+.form-control {
+    width: 100%;
+    padding: 0.75rem;
+    border: 2px solid var(--border-color);
+    border-radius: 8px;
+    font-size: 1rem;
+    transition: border-color 0.3s ease;
+}
+.form-control:focus {
+    outline: none;
+    border-color: var(--primary-color);
+}
+.btn {
+    padding: 0.75rem 1.5rem;
+    border: none;
+    border-radius: 8px;
+    font-size: 1rem;
+    font-weight: 600;
+    cursor: pointer;
+    transition: all 0.3s ease;
+}
+.btn-primary {
+    background: var(--primary-color);
+    color: white;
+}
+.btn-primary:hover {
+    background: var(--secondary-color);
+}
+.btn-secondary {
+    background: var(--text-secondary);
+    color: white;
+}
+.btn-success {
+    background: var(--success-color);
+    color: white;
+}
+.btn-danger {
+    background: var(--error-color);
+    color: white;
+}
+.btn-danger:hover {
+    background: #dc2626;
+}
+.file-upload {
+    border: 2px dashed var(--border-color);
+    border-radius: 8px;
+    padding: 2rem;
+    text-align: center;
+    cursor: pointer;
+    transition: all 0.3s ease;
+}
+.file-upload:hover {
+    border-color: var(--primary-color);
+    background-color: #f0f9ff;
+}
+.file-upload.dragover {
+    border-color: var(--primary-color);
+    background-color: #f0f9ff;
+}
+.result-box {
+    margin-top: 1rem;
+    padding: 1rem;
+    border-radius: 8px;
+    font-family: monospace;
+    white-space: pre-wrap;
+}
+.result-success {
+    background-color: #f0fdf4;
+    border: 1px solid var(--success-color);
+    color: #166534;
+}
+.result-error {
+    background-color: #fef2f2;
+    border: 1px solid var(--error-color);
+    color: #dc2626;
+}
+.result-warning {
+    background-color: #fffbeb;
+    border: 1px solid var(--warning-color);
+    color: #92400e;
+}
+.document-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
+    gap: 1rem;
+    margin-top: 1rem;
+}
+.document-card {
+    background: var(--card-bg);
+    border: 1px solid var(--border-color);
+    border-radius: 8px;
+    padding: 1rem;
+    box-shadow: 0 2px 4px -1px rgba(0, 0, 0, 0.1);
+}
+.document-card h4 {
+    color: var(--primary-color);
+    margin-bottom: 0.5rem;
+}
+.document-actions {
+    display: flex;
+    gap: 0.5rem;
+    margin-top: 1rem;
+}
+.category-buttons {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 0.5rem;
+    margin-bottom: 1rem;
+}
+.category-btn {
+    padding: 0.5rem 1rem;
+    background: var(--bg-color);
+    border: 1px solid var(--border-color);
+    border-radius: 20px;
+    cursor: pointer;
+    transition: all 0.3s ease;
+}
+.category-btn:hover, .category-btn.active {
+    background: var(--primary-color);
+    color: white;
+    border-color: var(--primary-color);
+}
+.loading {
+    display: inline-block;
+    width: 20px;
+    height: 20px;
+    border: 2px solid #f3f3f3;
+    border-top: 2px solid var(--primary-color);
+    border-radius: 50%;
+    animation: spin 1s linear infinite;
+}
+@keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+}
+.similarity-badge {
+    display: inline-block;
+    padding: 0.25rem 0.5rem;
+    border-radius: 12px;
+    font-size: 0.8rem;
+    font-weight: 600;
+}
+.similarity-high {
+    background: #dcfce7;
+    color: #166534;
+}
+.similarity-medium {
+    background: #fef3c7;
+    color: #92400e;
+}
+.similarity-low {
+    background: #fecaca;
+    color: #dc2626;
+}
+@media (max-width: 768px) {
+    .header-content {
+        text-align: center;
+    }
+    .header h1 {
+        font-size: 2rem;
+    }
+    .tabs {
+        flex-direction: column;
+    }
+    .tab-button {
+        text-align: center;
+    }
+}