File size: 1,647 Bytes
ed1b41e
f985395
 
 
ed1b41e
f985395
 
 
 
ed1b41e
f985395
ed1b41e
f985395
 
 
 
ed1b41e
f985395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed1b41e
 
f985395
ed1b41e
 
f985395
ed1b41e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Load tokenizer and model
model_name = "cybersectony/phishing-email-detection-distilbert_v2.4.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Define the prediction function
def detect_phishing(email_text):
    inputs = tokenizer(email_text, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=-1)[0]

    labels = [
        "Legitimate Email",
        "Phishing URL",
        "Legitimate URL",
        "Phishing URL (Alt)"
    ]
    label_probs = {label: float(prob) for label, prob in zip(labels, probs)}
    predicted_label = max(label_probs, key=label_probs.get)
    confidence = label_probs[predicted_label]

    verdict = "⚠️ Suspicious Email Detected." if "Phishing" in predicted_label else "✅ Email Appears Legitimate."
    result = f"{verdict}\n\nPrediction: {predicted_label}\nConfidence: {confidence:.2%}\n\nDetails:\n"
    for label, prob in label_probs.items():
        result += f"{label}: {prob:.2%}\n"
    return result

# Create Gradio interface
interface = gr.Interface(
    fn=detect_phishing,
    inputs=gr.Textbox(lines=15, placeholder="Paste the email content here..."),
    outputs="text",
    title="Phishing Email Detector",
    description="Detects whether an email is phishing or legitimate using a fine-tuned DistilBERT model."
)

if __name__ == "__main__":
    interface.launch()