from fastapi import FastAPI, HTTPException from pydantic import BaseModel import joblib import re # Load the model once when the app starts model = joblib.load("model.joblib") app = FastAPI(title="Email Classification API") @app.get("/") def root(): return {"message": "Email Classification API is running."} # Input data schema class EmailInput(BaseModel): subject: str = "" email: str # PII masking function (same as your training) def mask_and_store_all_pii(text): text = str(text) pii_map = {} patterns = { "email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b", "phone_number": r"\b\d{10}\b", "dob": r"\b\d{2}[/-]\d{2}[/-]\d{4}\b", "aadhar_num": r"\b\d{4}[- ]?\d{4}[- ]?\d{4}\b", "credit_debit_no": r"\b(?:\d[ -]*?){13,16}\b", "cvv_no": r"\b\d{3}\b", "expiry_no": r"\b(0[1-9]|1[0-2])\/\d{2,4}\b", "full_name": r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b" } for label, pattern in patterns.items(): matches = re.findall(pattern, text) for i, match in enumerate(matches): placeholder = f"[{label}_{i}]" pii_map[placeholder] = match text = text.replace(match, placeholder) return text, pii_map # Restore PII function def restore_pii(masked_text, pii_map): for placeholder, original in pii_map.items(): masked_text = masked_text.replace(placeholder, original) return masked_text @app.post("/classify") def classify_email(data: EmailInput): # Combine subject + email text raw_text = f"{data.subject} {data.email}" # Mask PII masked_text, pii_map = mask_and_store_all_pii(raw_text) # Predict class prediction = model.predict([masked_text])[0] # Return prediction and masked email return { "predicted_category": prediction, "masked_text": masked_text, "pii_map": pii_map }