Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
import joblib | |
import re | |
# Initialize the FastAPI app with default docs enabled | |
app = FastAPI( | |
title="Email Classification API", | |
version="1.0.0", | |
description="Classifies emails and masks PII/PCI information." | |
) | |
# Load the model once when the app starts | |
model = joblib.load("model.joblib") | |
# Define the root endpoint | |
def root(): | |
return {"message": "Email Classification API is running."} | |
# Define input data schema | |
class EmailInput(BaseModel): | |
subject: str = "" | |
email: str | |
# Function to mask and store PII | |
def mask_and_store_all_pii(text): | |
text = str(text) | |
pii_map = {} | |
patterns = { | |
"email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b", | |
"phone_number": r"\b\d{10}\b", | |
"dob": r"\b\d{2}[/-]\d{2}[/-]\d{4}\b", | |
"aadhar_num": r"\b\d{4}[- ]?\d{4}[- ]?\d{4}\b", | |
"credit_debit_no": r"\b(?:\d[ -]*?){13,16}\b", | |
"cvv_no": r"\b\d{3}\b", | |
"expiry_no": r"\b(0[1-9]|1[0-2])\/\d{2,4}\b", | |
"full_name": r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b" | |
} | |
for label, pattern in patterns.items(): | |
matches = re.findall(pattern, text) | |
for i, match in enumerate(matches): | |
placeholder = f"[{label}_{i}]" | |
pii_map[placeholder] = match | |
text = text.replace(match, placeholder) | |
return text, pii_map | |
# Endpoint to classify email | |
def classify_email(data: EmailInput): | |
raw_text = f"{data.subject} {data.email}" | |
masked_text, pii_map = mask_and_store_all_pii(raw_text) | |
prediction = model.predict([masked_text])[0] | |
return { | |
"predicted_category": prediction, | |
"masked_text": masked_text, | |
"pii_map": pii_map | |
} | |