Spaces:
Sleeping
Sleeping
File size: 1,742 Bytes
6b21b32 bf70aa2 cea309f 6b21b32 cea309f 6b21b32 bf70aa2 6b21b32 bf70aa2 6b21b32 bf70aa2 6b21b32 bf70aa2 6b21b32 bf70aa2 6b21b32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import re
app = FastAPI(
title="Email Classification API",
version="1.0.0",
description="Classifies support emails into categories and masks personal information.",
docs_url="/docs", # Swagger UI enabled here
redoc_url="/redoc" # Optional ReDoc UI
)
# Load model
model = joblib.load("model.joblib")
# Email input structure
class EmailInput(BaseModel):
subject: str = ""
email: str
# PII masker
def mask_and_store_all_pii(text):
pii_map = {}
patterns = {
"email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b",
"phone_number": r"\b\d{10}\b",
"dob": r"\b\d{2}[/-]\d{2}[/-]\d{4}\b",
"aadhar_num": r"\b\d{4}[- ]?\d{4}[- ]?\d{4}\b",
"credit_debit_no": r"\b(?:\d[ -]*?){13,16}\b",
"cvv_no": r"\b\d{3}\b",
"expiry_no": r"\b(0[1-9]|1[0-2])\/\d{2,4}\b",
"full_name": r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b"
}
for label, pattern in patterns.items():
matches = re.findall(pattern, text)
for i, match in enumerate(matches):
placeholder = f"[{label}_{i}]"
pii_map[placeholder] = match
text = text.replace(match, placeholder)
return text, pii_map
# Main endpoint
@app.post("/classify")
def classify_email(data: EmailInput):
raw_text = f"{data.subject} {data.email}"
masked_text, pii_map = mask_and_store_all_pii(raw_text)
prediction = model.predict([masked_text])[0]
return {
"masked_text": masked_text,
"predicted_category": prediction,
"pii_map": pii_map
}
@app.get("/")
def root():
return {"message": "Email Classification API is running."}
|