Spaces:
Sleeping
Sleeping
File size: 2,386 Bytes
6b21b32 bf70aa2 11184ec cea309f 6b21b32 11184ec cea309f 11184ec bf70aa2 11184ec bf70aa2 11184ec bf70aa2 11184ec bf70aa2 11184ec bf70aa2 11184ec bf70aa2 11184ec bf70aa2 11184ec bf70aa2 11184ec bf70aa2 11184ec bf70aa2 11184ec bf70aa2 11184ec bf70aa2 6b21b32 11184ec 6b21b32 11184ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import re
# Initialize FastAPI app
app = FastAPI(
title="Email Classification API",
version="1.0.0",
description="Classifies support emails into categories and masks personal information.",
docs_url="/docs",
redoc_url="/redoc"
)
# Load pre-trained model
model = joblib.load("model.joblib")
# Input schema
class EmailInput(BaseModel):
input_email_body: str
# PII Masking Function
def mask_and_store_all_pii(text):
text = str(text)
pii_map = {}
entity_list = []
patterns = {
"email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b",
"phone_number": r"\b\d{10}\b",
"dob": r"\b\d{2}[/-]\d{2}[/-]\d{4}\b",
"aadhar_num": r"\b\d{4}[- ]?\d{4}[- ]?\d{4}\b",
"credit_debit_no": r"\b(?:\d[ -]*?){13,16}\b",
"cvv_no": r"\b\d{3}\b",
"expiry_no": r"\b(0[1-9]|1[0-2])\/\d{2,4}\b",
"full_name": r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b"
}
for label, pattern in patterns.items():
for match in re.finditer(pattern, text):
original = match.group()
start, end = match.start(), match.end()
placeholder = f"[{label}_{len(pii_map)}]"
pii_map[placeholder] = original
entity_list.append({
"position": [start, end],
"classification": label,
"entity": original
})
text = text.replace(original, placeholder, 1)
return text, pii_map, entity_list
# Restore PII
def restore_pii(masked_text, pii_map):
restored = masked_text
for placeholder, original in pii_map.items():
restored = restored.replace(placeholder, original)
return restored
# Classification Endpoint
@app.post("/classify")
def classify_email(data: EmailInput):
raw_text = data.input_email_body
# Masking
masked_text, pii_map, entity_list = mask_and_store_all_pii(raw_text)
# Prediction
predicted_category = model.predict([masked_text])[0]
# Response format
return {
"input_email_body": raw_text,
"list_of_masked_entities": entity_list,
"masked_email": masked_text,
"category_of_the_email": predicted_category
}
# Health check endpoint
@app.get("/")
def root():
return {"message": "Email Classification API is running."}
|