Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,989 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import gradio as gr
|
4 |
+
from datetime import datetime
|
5 |
+
import tempfile
|
6 |
+
import io
|
7 |
+
import base64
|
8 |
+
|
9 |
+
# Check for required dependencies
|
10 |
+
try:
|
11 |
+
import fitz # PyMuPDF
|
12 |
+
PDF_SUPPORT = True
|
13 |
+
except ImportError:
|
14 |
+
PDF_SUPPORT = False
|
15 |
+
print("Warning: PyMuPDF not available, PDF support disabled")
|
16 |
+
|
17 |
+
try:
|
18 |
+
from gtts import gTTS
|
19 |
+
TTS_SUPPORT = True
|
20 |
+
except ImportError:
|
21 |
+
TTS_SUPPORT = False
|
22 |
+
print("Warning: gTTS not available, audio synthesis disabled")
|
23 |
+
|
24 |
+
try:
|
25 |
+
from groq import Groq
|
26 |
+
GROQ_SUPPORT = True
|
27 |
+
except ImportError:
|
28 |
+
GROQ_SUPPORT = False
|
29 |
+
print("Warning: Groq not available")
|
30 |
+
|
31 |
+
try:
|
32 |
+
from transformers import pipeline
|
33 |
+
HF_TRANSFORMERS_SUPPORT = True
|
34 |
+
except ImportError:
|
35 |
+
HF_TRANSFORMERS_SUPPORT = False
|
36 |
+
print("Warning: Transformers not available")
|
37 |
+
|
38 |
+
# β
Load secrets from environment with better error handling
|
39 |
+
groq_key = os.environ.get('GROQ_API_KEY')
|
40 |
+
hf_token = os.environ.get('HF_TOKEN')
|
41 |
+
kaggle_key = os.environ.get('KAGGLE_KEY')
|
42 |
+
kaggle_username = os.environ.get('KAGGLE_USERNAME')
|
43 |
+
|
44 |
+
# Ensure none of the required secrets are missing
|
45 |
+
if not all([groq_key, hf_token]):
|
46 |
+
raise EnvironmentError("β One or more required API keys are missing from environment variables.")
|
47 |
+
|
48 |
+
# Initialize components with fallbacks
|
49 |
+
client = None
|
50 |
+
phishing_pipe = None
|
51 |
+
|
52 |
+
if GROQ_SUPPORT and groq_key:
|
53 |
+
try:
|
54 |
+
client = Groq(api_key=groq_key)
|
55 |
+
print("β
Groq client initialized")
|
56 |
+
except Exception as e:
|
57 |
+
print(f"β Groq initialization failed: {e}")
|
58 |
+
|
59 |
+
if HF_TRANSFORMERS_SUPPORT and hf_token:
|
60 |
+
try:
|
61 |
+
phishing_pipe = pipeline(
|
62 |
+
"text-classification",
|
63 |
+
model="ealvaradob/bert-finetuned-phishing",
|
64 |
+
token=hf_token,
|
65 |
+
return_all_scores=True
|
66 |
+
)
|
67 |
+
print("β
Phishing detection model loaded")
|
68 |
+
except Exception as e:
|
69 |
+
print(f"β Model loading failed: {e}")
|
70 |
+
# Try alternative model
|
71 |
+
try:
|
72 |
+
phishing_pipe = pipeline(
|
73 |
+
"text-classification",
|
74 |
+
model="martin-ha/toxic-comment-model",
|
75 |
+
return_all_scores=True
|
76 |
+
)
|
77 |
+
print("β
Fallback model loaded")
|
78 |
+
except Exception as e2:
|
79 |
+
print(f"β Fallback model also failed: {e2}")
|
80 |
+
|
81 |
+
# Global variables
|
82 |
+
history_log = []
|
83 |
+
detailed_log = []
|
84 |
+
|
85 |
+
# π― Role options
|
86 |
+
role_choices = ["Procurement", "Warehouse", "Admin", "Finance", "Logistics"]
|
87 |
+
|
88 |
+
# π Language options
|
89 |
+
language_choices = [
|
90 |
+
"English", "Urdu", "Arabic", "French", "German", "Spanish", "Portuguese", "Hindi", "Turkish",
|
91 |
+
"Bengali", "Russian", "Chinese", "Japanese", "Korean", "Swahili", "Indonesian", "Italian",
|
92 |
+
"Dutch", "Polish", "Thai", "Vietnamese", "Romanian", "Persian", "Punjabi", "Greek", "Hebrew",
|
93 |
+
"Malay", "Czech", "Danish", "Finnish", "Hungarian", "Norwegian", "Slovak", "Swedish", "Tamil",
|
94 |
+
"Telugu", "Gujarati", "Marathi", "Pashto", "Serbian", "Croatian", "Ukrainian", "Bulgarian",
|
95 |
+
"Filipino", "Sinhala", "Mongolian", "Kazakh", "Azerbaijani", "Nepali", "Malayalam"
|
96 |
+
]
|
97 |
+
|
98 |
+
# Glossary terms with tooltip
|
99 |
+
GLOSSARY = {
|
100 |
+
"phishing": "Phishing is a scam where attackers trick you into revealing personal information.",
|
101 |
+
"domain spoofing": "Domain spoofing is when someone fakes a legitimate website's address to deceive you.",
|
102 |
+
"malware": "Malicious software designed to harm or exploit systems.",
|
103 |
+
"spam": "Unwanted or unsolicited messages.",
|
104 |
+
"tone": "The emotional character of the message."
|
105 |
+
}
|
106 |
+
|
107 |
+
def extract_text_from_file(file_obj):
|
108 |
+
"""Extract text from uploaded files with error handling"""
|
109 |
+
if file_obj is None:
|
110 |
+
return ""
|
111 |
+
|
112 |
+
try:
|
113 |
+
file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
|
114 |
+
ext = file_path.split(".")[-1].lower()
|
115 |
+
|
116 |
+
if ext == "pdf" and PDF_SUPPORT:
|
117 |
+
doc = fitz.open(file_path)
|
118 |
+
return "\n".join(page.get_text() for page in doc)
|
119 |
+
elif ext == "txt":
|
120 |
+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
121 |
+
return f.read()
|
122 |
+
else:
|
123 |
+
return f"Unsupported file type: {ext}"
|
124 |
+
except Exception as e:
|
125 |
+
return f"Error reading file: {str(e)}"
|
126 |
+
|
127 |
+
def rag_based_reranking(text, bert_analysis, language="English"):
|
128 |
+
"""
|
129 |
+
RAG/Prompt-Based Reranking: Use LLaMA to reanalyze and improve BERT's classification
|
130 |
+
This adds semantic analysis and intent understanding
|
131 |
+
"""
|
132 |
+
try:
|
133 |
+
# Create prompt for LLaMA semantic reanalysis
|
134 |
+
reranking_prompt = [
|
135 |
+
{
|
136 |
+
"role": "system",
|
137 |
+
"content": f"""
|
138 |
+
You are an expert cybersecurity analyst specializing in phishing detection.
|
139 |
+
Your job is to reanalyze email/message content using semantic understanding and intent analysis.
|
140 |
+
|
141 |
+
You have received a preliminary classification from a BERT model, but you need to provide a more accurate assessment using your understanding of:
|
142 |
+
- Social engineering tactics
|
143 |
+
- Urgency patterns
|
144 |
+
- Suspicious requests
|
145 |
+
- Context and intent
|
146 |
+
- Language patterns that indicate deception
|
147 |
+
|
148 |
+
Respond with your reanalysis in this exact format:
|
149 |
+
REANALYZED_THREAT_TYPE: [safe/spam/phishing/malware]
|
150 |
+
CONFIDENCE_LEVEL: [low/medium/high]
|
151 |
+
REASONING: [Brief explanation of your decision]
|
152 |
+
SEMANTIC_INDICATORS: [What semantic clues led to this conclusion]
|
153 |
+
"""
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"role": "user",
|
157 |
+
"content": f"""
|
158 |
+
ORIGINAL MESSAGE TO ANALYZE:
|
159 |
+
"{text}"
|
160 |
+
|
161 |
+
BERT MODEL'S PRELIMINARY ANALYSIS:
|
162 |
+
- Classification: {bert_analysis.get('model_prediction', 'Unknown')}
|
163 |
+
- Threat Type: {bert_analysis.get('threat_type', 'unknown')}
|
164 |
+
- AI Confidence: {bert_analysis.get('ai_confidence_score', 0)}%
|
165 |
+
|
166 |
+
TASK: Does this message pose a phishing, spam, malware, or other security risk?
|
167 |
+
Use your semantic understanding to reanalyze this message. Consider:
|
168 |
+
1. Intent and context
|
169 |
+
2. Social engineering patterns
|
170 |
+
3. Urgency or pressure tactics
|
171 |
+
4. Suspicious requests (credentials, money, personal info)
|
172 |
+
5. Language patterns that suggest deception
|
173 |
+
6. Overall trustworthiness
|
174 |
+
|
175 |
+
Provide your reanalysis using the format specified above.
|
176 |
+
"""
|
177 |
+
}
|
178 |
+
]
|
179 |
+
|
180 |
+
# Get LLaMA's semantic reanalysis
|
181 |
+
response = client.chat.completions.create(
|
182 |
+
model="llama3-8b-8192",
|
183 |
+
messages=reranking_prompt,
|
184 |
+
temperature=0.1, # Low temperature for consistent analysis
|
185 |
+
max_tokens=500
|
186 |
+
)
|
187 |
+
|
188 |
+
llama_response = response.choices[0].message.content.strip()
|
189 |
+
|
190 |
+
# Parse LLaMA's response
|
191 |
+
reanalysis = parse_llama_reanalysis(llama_response)
|
192 |
+
|
193 |
+
# Combine BERT and LLaMA insights
|
194 |
+
final_analysis = combine_bert_llama_analysis(bert_analysis, reanalysis, text)
|
195 |
+
|
196 |
+
return final_analysis
|
197 |
+
|
198 |
+
except Exception as e:
|
199 |
+
print(f"RAG Reranking failed: {e}")
|
200 |
+
# Fallback to original BERT analysis
|
201 |
+
bert_analysis['rag_error'] = str(e)
|
202 |
+
return bert_analysis
|
203 |
+
|
204 |
+
def parse_llama_reanalysis(llama_response):
|
205 |
+
"""Parse LLaMA's structured response"""
|
206 |
+
reanalysis = {
|
207 |
+
'llama_threat_type': 'unknown',
|
208 |
+
'llama_confidence': 'medium',
|
209 |
+
'llama_reasoning': '',
|
210 |
+
'semantic_indicators': ''
|
211 |
+
}
|
212 |
+
|
213 |
+
lines = llama_response.split('\n')
|
214 |
+
for line in lines:
|
215 |
+
line = line.strip()
|
216 |
+
if line.startswith('REANALYZED_THREAT_TYPE:'):
|
217 |
+
reanalysis['llama_threat_type'] = line.split(':', 1)[1].strip().lower()
|
218 |
+
elif line.startswith('CONFIDENCE_LEVEL:'):
|
219 |
+
reanalysis['llama_confidence'] = line.split(':', 1)[1].strip().lower()
|
220 |
+
elif line.startswith('REASONING:'):
|
221 |
+
reanalysis['llama_reasoning'] = line.split(':', 1)[1].strip()
|
222 |
+
elif line.startswith('SEMANTIC_INDICATORS:'):
|
223 |
+
reanalysis['semantic_indicators'] = line.split(':', 1)[1].strip()
|
224 |
+
|
225 |
+
return reanalysis
|
226 |
+
|
227 |
+
def combine_bert_llama_analysis(bert_analysis, llama_reanalysis, original_text):
|
228 |
+
"""
|
229 |
+
Combine BERT and LLaMA analysis using hybrid decision logic
|
230 |
+
LLaMA's semantic understanding gets priority for final classification
|
231 |
+
"""
|
232 |
+
|
233 |
+
# Get both predictions
|
234 |
+
bert_threat = bert_analysis.get('threat_type', 'unknown')
|
235 |
+
llama_threat = llama_reanalysis.get('llama_threat_type', 'unknown')
|
236 |
+
llama_confidence = llama_reanalysis.get('llama_confidence', 'medium')
|
237 |
+
|
238 |
+
# Hybrid decision logic
|
239 |
+
if llama_confidence == 'high':
|
240 |
+
# Trust LLaMA's high-confidence assessment
|
241 |
+
final_threat_type = llama_threat
|
242 |
+
decision_method = "LLaMA High Confidence"
|
243 |
+
elif bert_threat == llama_threat:
|
244 |
+
# Both models agree - high confidence in result
|
245 |
+
final_threat_type = bert_threat
|
246 |
+
decision_method = "BERT + LLaMA Agreement"
|
247 |
+
elif llama_threat == 'safe' and bert_threat in ['spam', 'unknown']:
|
248 |
+
# LLaMA says safe, BERT unsure - lean towards safe
|
249 |
+
final_threat_type = 'safe'
|
250 |
+
decision_method = "LLaMA Safety Override"
|
251 |
+
elif llama_threat in ['phishing', 'malware'] and bert_threat != 'safe':
|
252 |
+
# LLaMA detects serious threat - prioritize security
|
253 |
+
final_threat_type = llama_threat
|
254 |
+
decision_method = "LLaMA Threat Detection"
|
255 |
+
else:
|
256 |
+
# Default to BERT with LLaMA insights
|
257 |
+
final_threat_type = bert_threat
|
258 |
+
decision_method = "BERT with LLaMA Insights"
|
259 |
+
|
260 |
+
# Create enhanced analysis combining both models
|
261 |
+
enhanced_analysis = bert_analysis.copy()
|
262 |
+
enhanced_analysis.update({
|
263 |
+
'final_threat_type': final_threat_type,
|
264 |
+
'bert_prediction': bert_threat,
|
265 |
+
'llama_prediction': llama_threat,
|
266 |
+
'llama_confidence': llama_confidence,
|
267 |
+
'llama_reasoning': llama_reanalysis.get('llama_reasoning', ''),
|
268 |
+
'semantic_indicators': llama_reanalysis.get('semantic_indicators', ''),
|
269 |
+
'decision_method': decision_method,
|
270 |
+
'hybrid_analysis': True
|
271 |
+
})
|
272 |
+
|
273 |
+
# Recalculate threat score based on final classification
|
274 |
+
enhanced_analysis['threat_type'] = final_threat_type
|
275 |
+
threat_score = calculate_threat_score(enhanced_analysis)
|
276 |
+
enhanced_analysis['threat_score'] = threat_score
|
277 |
+
|
278 |
+
return enhanced_analysis
|
279 |
+
|
280 |
+
def calculate_threat_score(hf_analysis):
|
281 |
+
"""
|
282 |
+
Calculate threat score based on AI analysis results
|
283 |
+
Returns score from 0-100 where higher means more dangerous
|
284 |
+
"""
|
285 |
+
threat_type = hf_analysis.get('threat_type', 'unknown')
|
286 |
+
confidence_percentage = hf_analysis.get('ai_confidence_score', 0)
|
287 |
+
|
288 |
+
if threat_type == 'safe':
|
289 |
+
# For safe messages, use INVERSE of confidence
|
290 |
+
# High confidence in "safe" = Low threat score
|
291 |
+
threat_score = max(0, min(20, (100 - confidence_percentage) * 0.2))
|
292 |
+
|
293 |
+
elif threat_type == 'spam':
|
294 |
+
# For spam, map confidence to 21-40% range
|
295 |
+
threat_score = 21 + (confidence_percentage * 0.19)
|
296 |
+
|
297 |
+
elif threat_type == 'phishing':
|
298 |
+
# For phishing, map confidence to 61-80% range
|
299 |
+
threat_score = 61 + (confidence_percentage * 0.19)
|
300 |
+
|
301 |
+
elif threat_type == 'malware':
|
302 |
+
# For malware, map confidence to 81-100% range
|
303 |
+
threat_score = 81 + (confidence_percentage * 0.19)
|
304 |
+
|
305 |
+
else:
|
306 |
+
# For unknown threats, use moderate risk
|
307 |
+
threat_score = 41 + (confidence_percentage * 0.19)
|
308 |
+
|
309 |
+
# Ensure score stays within bounds
|
310 |
+
threat_score = round(min(100, max(0, threat_score)), 1)
|
311 |
+
|
312 |
+
# Additional safety check for very short, innocent messages
|
313 |
+
text = hf_analysis.get('raw_text', '')
|
314 |
+
if len(text.strip()) <= 10 and threat_type == 'safe':
|
315 |
+
threat_score = min(threat_score, 10.0)
|
316 |
+
|
317 |
+
return threat_score
|
318 |
+
|
319 |
+
def analyze_with_huggingface(text):
|
320 |
+
"""
|
321 |
+
First stage: Analyze message using Hugging Face BERT model
|
322 |
+
Returns detailed technical analysis for LLaMA to interpret
|
323 |
+
FIXED VERSION: Properly handles safe messages like "HI"
|
324 |
+
"""
|
325 |
+
try:
|
326 |
+
# Get prediction from Hugging Face model
|
327 |
+
result = phishing_pipe(text)
|
328 |
+
|
329 |
+
# Extract prediction details
|
330 |
+
prediction = result[0]
|
331 |
+
label = prediction['label']
|
332 |
+
confidence_score = prediction['score']
|
333 |
+
|
334 |
+
# Convert to percentage
|
335 |
+
confidence_percentage = round(confidence_score * 100, 2)
|
336 |
+
|
337 |
+
# Map labels to threat types (adjust based on your model's labels)
|
338 |
+
threat_mapping = {
|
339 |
+
'PHISHING': 'phishing',
|
340 |
+
'LEGITIMATE': 'safe',
|
341 |
+
'SPAM': 'spam',
|
342 |
+
'MALWARE': 'malware'
|
343 |
+
}
|
344 |
+
|
345 |
+
threat_type = threat_mapping.get(label.upper(), 'unknown')
|
346 |
+
|
347 |
+
# FIXED LOGIC: Calculate threat score based on what the model actually detected
|
348 |
+
if threat_type == 'safe':
|
349 |
+
# For safe messages, use INVERSE of confidence
|
350 |
+
# High confidence in "safe" = Low threat score
|
351 |
+
threat_score = max(0, min(20, (100 - confidence_percentage) * 0.2))
|
352 |
+
threat_level = "Safe"
|
353 |
+
|
354 |
+
elif threat_type == 'spam':
|
355 |
+
# For spam, map confidence to 21-40% range
|
356 |
+
threat_score = 21 + (confidence_percentage * 0.19)
|
357 |
+
threat_level = "Minimal Suspicion"
|
358 |
+
|
359 |
+
elif threat_type == 'phishing':
|
360 |
+
# For phishing, map confidence to 61-80% range
|
361 |
+
threat_score = 61 + (confidence_percentage * 0.19)
|
362 |
+
threat_level = "Likely Threat"
|
363 |
+
|
364 |
+
elif threat_type == 'malware':
|
365 |
+
# For malware, map confidence to 81-100% range
|
366 |
+
threat_score = 81 + (confidence_percentage * 0.19)
|
367 |
+
threat_level = "Severe Threat"
|
368 |
+
|
369 |
+
else:
|
370 |
+
# For unknown threats, use moderate risk
|
371 |
+
threat_score = 41 + (confidence_percentage * 0.19)
|
372 |
+
threat_level = "Needs Attention"
|
373 |
+
|
374 |
+
# Ensure score stays within bounds
|
375 |
+
threat_score = round(min(100, max(0, threat_score)), 1)
|
376 |
+
|
377 |
+
# Additional safety check for very short, innocent messages
|
378 |
+
if len(text.strip()) <= 10 and threat_type == 'safe':
|
379 |
+
# For very short messages classified as safe, ensure low threat score
|
380 |
+
threat_score = min(threat_score, 10.0)
|
381 |
+
threat_level = "Safe"
|
382 |
+
|
383 |
+
# Create technical analysis summary for LLaMA
|
384 |
+
technical_analysis = {
|
385 |
+
'model_prediction': label,
|
386 |
+
'ai_confidence_score': confidence_percentage,
|
387 |
+
'threat_type': threat_type,
|
388 |
+
'threat_score': threat_score,
|
389 |
+
'threat_level': threat_level,
|
390 |
+
'raw_text': text[:500]
|
391 |
+
}
|
392 |
+
|
393 |
+
return technical_analysis
|
394 |
+
|
395 |
+
except Exception as e:
|
396 |
+
# Fallback analysis if Hugging Face model fails
|
397 |
+
return {
|
398 |
+
'model_prediction': 'UNKNOWN',
|
399 |
+
'ai_confidence_score': 0,
|
400 |
+
'threat_type': 'unknown',
|
401 |
+
'threat_score': 50.0,
|
402 |
+
'threat_level': 'Needs Attention',
|
403 |
+
'raw_text': text[:500],
|
404 |
+
'error': str(e)
|
405 |
+
}
|
406 |
+
|
407 |
+
def build_enhanced_prompt_messages(hf_analysis, language="English", role="Admin"):
|
408 |
+
"""
|
409 |
+
Build prompt that includes both BERT and LLaMA reanalysis for final interpretation
|
410 |
+
"""
|
411 |
+
# Check if hybrid analysis was performed
|
412 |
+
if hf_analysis.get('hybrid_analysis', False):
|
413 |
+
technical_data = f"""
|
414 |
+
HYBRID AI ANALYSIS RESULTS:
|
415 |
+
- BERT Model Prediction: {hf_analysis.get('bert_prediction', 'Unknown')}
|
416 |
+
- LLaMA Semantic Analysis: {hf_analysis.get('llama_prediction', 'Unknown')}
|
417 |
+
- Final Classification: {hf_analysis['final_threat_type']}
|
418 |
+
- Decision Method: {hf_analysis.get('decision_method', 'Standard')}
|
419 |
+
- LLaMA Confidence: {hf_analysis.get('llama_confidence', 'medium')}
|
420 |
+
- Threat Score: {hf_analysis['threat_score']}% (0-100, higher = more dangerous)
|
421 |
+
- LLaMA Reasoning: {hf_analysis.get('llama_reasoning', 'N/A')}
|
422 |
+
- Semantic Indicators: {hf_analysis.get('semantic_indicators', 'N/A')}
|
423 |
+
- Original Message: "{hf_analysis['raw_text']}"
|
424 |
+
"""
|
425 |
+
else:
|
426 |
+
technical_data = f"""
|
427 |
+
STANDARD AI ANALYSIS:
|
428 |
+
- Model Prediction: {hf_analysis['model_prediction']}
|
429 |
+
- Detected Threat Type: {hf_analysis['threat_type']}
|
430 |
+
- Threat Score: {hf_analysis['threat_score']}% (0-100, higher = more dangerous)
|
431 |
+
- Original Message: "{hf_analysis['raw_text']}"
|
432 |
+
"""
|
433 |
+
|
434 |
+
if 'error' in hf_analysis or 'rag_error' in hf_analysis:
|
435 |
+
error_msg = hf_analysis.get('error', hf_analysis.get('rag_error', ''))
|
436 |
+
technical_data += f"\n- Analysis Note: {error_msg}"
|
437 |
+
|
438 |
+
return [
|
439 |
+
{
|
440 |
+
"role": "system",
|
441 |
+
"content": f"""
|
442 |
+
You are a friendly cybersecurity assistant helping employees in the supply chain industry.
|
443 |
+
You have received results from a hybrid AI analysis system that combines:
|
444 |
+
1. BERT model for technical pattern detection
|
445 |
+
2. LLaMA model for semantic understanding and intent analysis
|
446 |
+
|
447 |
+
Your job is to explain these results in SIMPLE, NON-TECHNICAL language that anyone can understand.
|
448 |
+
|
449 |
+
Guidelines:
|
450 |
+
- Use everyday words instead of technical jargon
|
451 |
+
- The threat score ranges from 0-100 where higher numbers mean more dangerous
|
452 |
+
- Explain both what the computers found AND why it matters
|
453 |
+
- Give clear, practical advice for a {role} employee
|
454 |
+
- If there's disagreement between models, explain what that means
|
455 |
+
|
456 |
+
Always respond completely in {language} only.
|
457 |
+
Make it sound like you're talking to a friend, not giving a technical report.
|
458 |
+
"""
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"role": "user",
|
462 |
+
"content": f"""
|
463 |
+
Please analyze this hybrid security check and explain it in simple terms:
|
464 |
+
|
465 |
+
{technical_data}
|
466 |
+
|
467 |
+
Respond in this format using everyday language:
|
468 |
+
1. Tone: (How does the message sound? Pushy, friendly, normal, etc.)
|
469 |
+
2. Threat Type: (What kind of danger is this? Safe message, scam attempt, spam, etc.)
|
470 |
+
3. Threat Score: (Show the danger level number from 0-100)
|
471 |
+
4. AI Analysis Summary: (What did both computer systems find? Did they agree?)
|
472 |
+
5. Simple Explanation (in {language}): (Explain in plain words why this is safe or dangerous)
|
473 |
+
6. What should you do as a {role} worker (in {language}): (Clear, simple steps)
|
474 |
+
7. Why the computers flagged this: (Explain what the AI systems noticed)
|
475 |
+
8. Detailed Advisory (in {language}): (Comprehensive guidance and precautions)
|
476 |
+
"""
|
477 |
+
}
|
478 |
+
]
|
479 |
+
|
480 |
+
def get_threat_level_display(threat_score):
|
481 |
+
"""
|
482 |
+
Get color-coded threat level display based on corrected 5-level system
|
483 |
+
"""
|
484 |
+
if threat_score <= 20:
|
485 |
+
return "π’ SAFE - No threat detected"
|
486 |
+
elif threat_score <= 40:
|
487 |
+
return "π‘ MINIMAL SUSPICION - Minor concerns"
|
488 |
+
elif threat_score <= 60:
|
489 |
+
return "π NEEDS ATTENTION - Requires careful review"
|
490 |
+
elif threat_score <= 80:
|
491 |
+
return "π΄ LIKELY THREAT - High probability of danger"
|
492 |
+
else:
|
493 |
+
return "β« SEVERE THREAT - Immediate action required"
|
494 |
+
|
495 |
+
def generate_text_report(analysis_data, hf_analysis, input_text):
|
496 |
+
"""
|
497 |
+
Generate a structured text report that can be downloaded as a .txt file
|
498 |
+
"""
|
499 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
500 |
+
|
501 |
+
report = f"""
|
502 |
+
================================================================================
|
503 |
+
ZEROPHISH GATE - SECURITY ANALYSIS REPORT
|
504 |
+
================================================================================
|
505 |
+
|
506 |
+
Analysis Date: {timestamp}
|
507 |
+
Generated by: ZeroPhish Gate AI Security System
|
508 |
+
|
509 |
+
================================================================================
|
510 |
+
ANALYZED MESSAGE
|
511 |
+
================================================================================
|
512 |
+
|
513 |
+
{input_text}
|
514 |
+
|
515 |
+
================================================================================
|
516 |
+
THREAT ASSESSMENT SUMMARY
|
517 |
+
================================================================================
|
518 |
+
|
519 |
+
AI Detection: {hf_analysis.get('model_prediction', 'Unknown')}
|
520 |
+
Message Type: {hf_analysis.get('threat_type', 'Unknown').title()}
|
521 |
+
Threat Score: {hf_analysis.get('threat_score', 'N/A')}%
|
522 |
+
|
523 |
+
================================================================================
|
524 |
+
DETAILED ANALYSIS
|
525 |
+
================================================================================
|
526 |
+
|
527 |
+
"""
|
528 |
+
|
529 |
+
# Parse detailed analysis sections
|
530 |
+
sections = {}
|
531 |
+
current_section = ""
|
532 |
+
lines = analysis_data.split('\n')
|
533 |
+
|
534 |
+
for line in lines:
|
535 |
+
line = line.strip()
|
536 |
+
if line.startswith(('1. Tone:', '2. Threat Type:', '3. Threat Score:', '4. Simple Explanation', '5. What should you do', '6. Why the computer', '7. Detailed Advisory')):
|
537 |
+
current_section = line
|
538 |
+
sections[current_section] = ""
|
539 |
+
elif current_section and line and not line.startswith('π€'):
|
540 |
+
sections[current_section] += line + " "
|
541 |
+
|
542 |
+
# Add detailed analysis sections
|
543 |
+
section_titles = [
|
544 |
+
("1. Tone:", "MESSAGE TONE ANALYSIS"),
|
545 |
+
("2. Threat Type:", "THREAT CLASSIFICATION"),
|
546 |
+
("3. Threat Score:", "THREAT SCORE ASSESSMENT"),
|
547 |
+
("4. Simple Explanation", "DETAILED EXPLANATION"),
|
548 |
+
("5. What should you do", "RECOMMENDED ACTIONS"),
|
549 |
+
("6. Why the computer", "AI DETECTION REASONING"),
|
550 |
+
("7. Detailed Advisory", "COMPREHENSIVE ADVISORY")
|
551 |
+
]
|
552 |
+
|
553 |
+
for section_key, section_title in section_titles:
|
554 |
+
content_text = ""
|
555 |
+
for key, value in sections.items():
|
556 |
+
if key.startswith(section_key):
|
557 |
+
content_text = value.strip()
|
558 |
+
break
|
559 |
+
|
560 |
+
if content_text:
|
561 |
+
report += f"""
|
562 |
+
{section_title}
|
563 |
+
{'-' * len(section_title)}
|
564 |
+
|
565 |
+
{content_text}
|
566 |
+
|
567 |
+
"""
|
568 |
+
|
569 |
+
# Footer
|
570 |
+
report += f"""
|
571 |
+
================================================================================
|
572 |
+
REPORT FOOTER
|
573 |
+
================================================================================
|
574 |
+
|
575 |
+
This report was generated by ZeroPhish Gate AI Security System.
|
576 |
+
For support or questions, contact your IT security team.
|
577 |
+
|
578 |
+
Report ID: ZPG-{timestamp.replace(' ', 'T').replace(':', '-')}
|
579 |
+
Analysis completed at: {timestamp}
|
580 |
+
|
581 |
+
================================================================================
|
582 |
+
"""
|
583 |
+
|
584 |
+
return report
|
585 |
+
|
586 |
+
def generate_downloadable_report(analysis_data, hf_analysis, input_text):
|
587 |
+
"""
|
588 |
+
Generate a downloadable report file
|
589 |
+
"""
|
590 |
+
# Create text report
|
591 |
+
text_report = generate_text_report(analysis_data, hf_analysis, input_text)
|
592 |
+
|
593 |
+
# Create downloadable file
|
594 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
595 |
+
filename = f"zerophish_security_report_{timestamp}.txt"
|
596 |
+
|
597 |
+
# Create a temporary file for download
|
598 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8') as f:
|
599 |
+
f.write(text_report)
|
600 |
+
temp_path = f.name
|
601 |
+
|
602 |
+
return temp_path
|
603 |
+
|
604 |
+
def add_visual_badges(text):
|
605 |
+
tags = []
|
606 |
+
if "urgent" in text.lower():
|
607 |
+
tags.append("π΄ Urgent Tone Detected")
|
608 |
+
if "suspicious" in text.lower() and "domain" in text.lower():
|
609 |
+
tags.append("π Suspicious Domain")
|
610 |
+
if "safe" in text.lower() or "threat type: safe" in text.lower():
|
611 |
+
tags.append("π© Clean")
|
612 |
+
if "ai model" in text.lower():
|
613 |
+
tags.append("π€ AI-Enhanced Analysis")
|
614 |
+
if tags:
|
615 |
+
return text + "\n\nπ¨ Visual Tags:\n" + "\n".join(tags)
|
616 |
+
return text
|
617 |
+
|
618 |
+
def apply_glossary_tooltips(text):
|
619 |
+
"""Apply HTML tooltips for glossary terms"""
|
620 |
+
# First, convert newlines to HTML breaks
|
621 |
+
text = text.replace('\n', '<br>')
|
622 |
+
|
623 |
+
# Apply tooltips to glossary terms
|
624 |
+
for term, definition in GLOSSARY.items():
|
625 |
+
pattern = re.compile(rf'\b{re.escape(term)}\b', re.IGNORECASE)
|
626 |
+
# Create HTML span with title attribute for tooltip
|
627 |
+
tooltip_html = f'<span title="{definition}" style="font-weight: bold; text-decoration: underline; cursor: help; color: #0066cc;">{term}</span>'
|
628 |
+
text = pattern.sub(tooltip_html, text)
|
629 |
+
|
630 |
+
# Handle markdown-style bold text
|
631 |
+
text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', text)
|
632 |
+
|
633 |
+
# Wrap in a simple div
|
634 |
+
html_output = f'<div style="padding: 10px; line-height: 1.5; font-size: 14px;">{text}</div>'
|
635 |
+
|
636 |
+
return html_output
|
637 |
+
|
638 |
+
def synthesize_audio(text, language):
|
639 |
+
if not TTS_SUPPORT:
|
640 |
+
return None
|
641 |
+
|
642 |
+
try:
|
643 |
+
# Keep audio short and simple
|
644 |
+
audio_text = f"Security analysis complete. This message appears to be {text.split('**Message Type:**')[1].split('**')[0].strip()} risk level."
|
645 |
+
tts = gTTS(text=audio_text, lang=language.lower()[:2], slow=False)
|
646 |
+
|
647 |
+
# Create temporary file
|
648 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
649 |
+
tts.save(tmp_file.name)
|
650 |
+
return tmp_file.name
|
651 |
+
except Exception as e:
|
652 |
+
print(f"Audio synthesis failed: {e}")
|
653 |
+
return None
|
654 |
+
|
655 |
+
def new_chat():
|
656 |
+
"""Reset all fields for a new chat session"""
|
657 |
+
return (
|
658 |
+
"", # text_input
|
659 |
+
None, # file_input
|
660 |
+
"", # output
|
661 |
+
gr.update(visible=False), # report_btn
|
662 |
+
gr.update(visible=False), # ignore_btn
|
663 |
+
gr.update(visible=False), # report_msg
|
664 |
+
None, # audio_output
|
665 |
+
None, # report_file
|
666 |
+
)
|
667 |
+
|
668 |
+
def analyze_message_interface(text_input, uploaded_file, language, role):
|
669 |
+
file_text = extract_text_from_file(uploaded_file) if uploaded_file else ""
|
670 |
+
text_input = text_input.strip()
|
671 |
+
file_text = file_text.strip()
|
672 |
+
|
673 |
+
if not text_input and not file_text:
|
674 |
+
return "β No input provided via text or file.", gr.update(visible=False), history_log, gr.update(choices=[], value=None), "", None, gr.update(visible=False), gr.update(visible=False), None
|
675 |
+
|
676 |
+
combined_input = f"User message:\n{text_input}\n\nAttached file content:\n{file_text}" if text_input and file_text else (text_input or file_text)
|
677 |
+
|
678 |
+
# STAGE 1: Hugging Face BERT Analysis
|
679 |
+
print("π€ Stage 1: Running Hugging Face BERT analysis...")
|
680 |
+
bert_analysis = analyze_with_huggingface(combined_input)
|
681 |
+
|
682 |
+
# STAGE 1.5: RAG-Based Reranking with LLaMA Semantic Analysis
|
683 |
+
print("π§ Stage 1.5: RAG-based reranking with LLaMA semantic analysis...")
|
684 |
+
hf_analysis = rag_based_reranking(combined_input, bert_analysis, language)
|
685 |
+
|
686 |
+
# Calculate final threat score
|
687 |
+
threat_score = calculate_threat_score(hf_analysis)
|
688 |
+
hf_analysis['threat_score'] = threat_score
|
689 |
+
|
690 |
+
# Stage 2: Final LLaMA Interpretation
|
691 |
+
print("π§ Stage 2: Final LLaMA interpretation of hybrid analysis...")
|
692 |
+
messages = build_enhanced_prompt_messages(hf_analysis, language, role)
|
693 |
+
response = client.chat.completions.create(
|
694 |
+
model="llama3-8b-8192",
|
695 |
+
messages=messages,
|
696 |
+
temperature=0.3,
|
697 |
+
max_tokens=1000
|
698 |
+
)
|
699 |
+
result = response.choices[0].message.content.strip()
|
700 |
+
|
701 |
+
# Add hybrid analysis summary to the result
|
702 |
+
if hf_analysis.get('hybrid_analysis', False):
|
703 |
+
result += f"\n\nπ€ **Hybrid AI Analysis Summary:**\n"
|
704 |
+
result += f"- BERT Detection: {hf_analysis.get('bert_prediction', 'Unknown').title()}\n"
|
705 |
+
result += f"- LLaMA Reanalysis: {hf_analysis.get('llama_prediction', 'Unknown').title()}\n"
|
706 |
+
result += f"- Final Decision: {hf_analysis['final_threat_type'].title()}\n"
|
707 |
+
result += f"- Method: {hf_analysis.get('decision_method', 'Standard')}\n"
|
708 |
+
result += f"- Threat Score: {hf_analysis['threat_score']}%"
|
709 |
+
else:
|
710 |
+
result += f"\n\nπ€ **Computer Analysis Summary:**\n- Detection: {hf_analysis['model_prediction']}\n- Message Type: {hf_analysis['threat_type'].title()}\n- Threat Score: {hf_analysis['threat_score']}%"
|
711 |
+
|
712 |
+
result_with_badges = add_visual_badges(result)
|
713 |
+
result_with_tooltips = apply_glossary_tooltips(result_with_badges)
|
714 |
+
|
715 |
+
# Determine if threat based on final analysis
|
716 |
+
final_threat_type = hf_analysis.get('final_threat_type', hf_analysis.get('threat_type', 'unknown'))
|
717 |
+
is_threat = hf_analysis['threat_score'] > 20
|
718 |
+
|
719 |
+
# Extract information for logging
|
720 |
+
threat_score_str = f"{hf_analysis['threat_score']}%"
|
721 |
+
status = "Safe" if final_threat_type == 'safe' else "Review"
|
722 |
+
|
723 |
+
history_log.append([
|
724 |
+
datetime.now().strftime("%Y-%m-%d %H:%M"),
|
725 |
+
combined_input[:40] + "...",
|
726 |
+
threat_score_str,
|
727 |
+
final_threat_type.title(),
|
728 |
+
status
|
729 |
+
])
|
730 |
+
|
731 |
+
# Store data for detailed view
|
732 |
+
detailed_log.append({
|
733 |
+
"full_input": combined_input,
|
734 |
+
"full_result": result_with_badges,
|
735 |
+
"hf_analysis": hf_analysis,
|
736 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
737 |
+
})
|
738 |
+
|
739 |
+
audio_path = synthesize_audio(result, language)
|
740 |
+
|
741 |
+
# Generate downloadable report
|
742 |
+
report_file = generate_downloadable_report(result_with_badges, hf_analysis, combined_input)
|
743 |
+
|
744 |
+
return (
|
745 |
+
result_with_tooltips, # output as plain text
|
746 |
+
gr.update(visible=is_threat), # report_btn
|
747 |
+
history_log,
|
748 |
+
gr.update(choices=[str(i) for i in range(len(detailed_log))], value=None),
|
749 |
+
"", # full_view
|
750 |
+
audio_path,
|
751 |
+
gr.update(visible=is_threat), # report_msg
|
752 |
+
gr.update(visible=is_threat), # ignore_btn
|
753 |
+
report_file # report_file for download
|
754 |
+
)
|
755 |
+
|
756 |
+
def view_full_report(index):
|
757 |
+
try:
|
758 |
+
idx = int(index)
|
759 |
+
record = detailed_log[idx]
|
760 |
+
hf_data = record['hf_analysis']
|
761 |
+
|
762 |
+
report = f"π
**Timestamp:** {record['timestamp']}\n\n"
|
763 |
+
report += f"π **Input Message:**\n{record['full_input']}\n\n"
|
764 |
+
report += f"π€ **Computer Security Check:**\n"
|
765 |
+
report += f"- What AI Found: {hf_data['model_prediction']}\n"
|
766 |
+
report += f"- Message Type: {hf_data['threat_type']}\n"
|
767 |
+
report += f"- Threat Score: {hf_data.get('threat_score', 'N/A')}%\n"
|
768 |
+
if 'error' in hf_data:
|
769 |
+
report += f"- Note: {hf_data['error']}\n"
|
770 |
+
report += f"\nπ **Detailed Analysis:**\n{record['full_result']}"
|
771 |
+
|
772 |
+
return report
|
773 |
+
except:
|
774 |
+
return "β Invalid selection"
|
775 |
+
|
776 |
+
def report_to_it(language):
|
777 |
+
english_msg = "β
Your request has been forwarded to the concerning department..."
|
778 |
+
if history_log:
|
779 |
+
history_log[-1][4] = "Reported"
|
780 |
+
|
781 |
+
if language.lower() == "english":
|
782 |
+
return english_msg, history_log
|
783 |
+
|
784 |
+
prompt = [{
|
785 |
+
"role": "user",
|
786 |
+
"content": f'Translate this message into {language} and include the English version in brackets:\n\n"{english_msg}"'
|
787 |
+
}]
|
788 |
+
response = client.chat.completions.create(
|
789 |
+
model="llama3-8b-8192",
|
790 |
+
messages=prompt,
|
791 |
+
temperature=0.2,
|
792 |
+
max_tokens=250
|
793 |
+
)
|
794 |
+
output = response.choices[0].message.content.strip()
|
795 |
+
|
796 |
+
match = re.search(r'([^\[]+)(\[[^\]]+\])', output)
|
797 |
+
if match:
|
798 |
+
return f"{match.group(1).strip()}\n{match.group(2).strip()}", history_log
|
799 |
+
else:
|
800 |
+
return f"{output}\n[{english_msg}]", history_log
|
801 |
+
|
802 |
+
def ignore_latest():
|
803 |
+
if history_log:
|
804 |
+
history_log[-1][4] = "Ignored"
|
805 |
+
return history_log
|
806 |
+
|
807 |
+
def clear_history():
|
808 |
+
history_log.clear()
|
809 |
+
detailed_log.clear()
|
810 |
+
return [], [], "", gr.update(visible=False)
|
811 |
+
|
812 |
+
def create_interface():
|
813 |
+
"""Create the Gradio interface"""
|
814 |
+
|
815 |
+
with gr.Blocks(
|
816 |
+
title="ZeroPhish Gate - Phishing Detection",
|
817 |
+
theme=gr.themes.Soft(),
|
818 |
+
css="""
|
819 |
+
.main-header { text-align: center; margin-bottom: 20px; }
|
820 |
+
.analysis-output { padding: 15px; border-radius: 10px; }
|
821 |
+
"""
|
822 |
+
) as demo:
|
823 |
+
|
824 |
+
gr.HTML("""
|
825 |
+
<div class="main-header">
|
826 |
+
<h1>π‘οΈ ZeroPhish Gate</h1>
|
827 |
+
<h3>AI-Powered Phishing & Threat Detection</h3>
|
828 |
+
<p>Analyze messages, emails, and documents for potential security threats</p>
|
829 |
+
</div>
|
830 |
+
""")
|
831 |
+
|
832 |
+
# System status
|
833 |
+
status_msg = "π’ System Ready"
|
834 |
+
if not (GROQ_SUPPORT and groq_key):
|
835 |
+
status_msg += " (Advanced AI disabled)"
|
836 |
+
if not phishing_pipe:
|
837 |
+
status_msg += " (Using basic detection)"
|
838 |
+
|
839 |
+
gr.Markdown(f"**Status:** {status_msg}")
|
840 |
+
|
841 |
+
with gr.Row():
|
842 |
+
with gr.Column(scale=3):
|
843 |
+
text_input = gr.Textbox(
|
844 |
+
label="π Message to Analyze",
|
845 |
+
placeholder="Paste suspicious email, SMS, or message here...",
|
846 |
+
lines=5
|
847 |
+
)
|
848 |
+
|
849 |
+
file_input = gr.File(
|
850 |
+
label="π Upload File (Optional)",
|
851 |
+
file_types=[".txt", ".pdf"] if PDF_SUPPORT else [".txt"]
|
852 |
+
)
|
853 |
+
|
854 |
+
with gr.Column(scale=1):
|
855 |
+
role = gr.Dropdown(
|
856 |
+
label="π€ Your Role",
|
857 |
+
choices=role_choices,
|
858 |
+
value="Admin"
|
859 |
+
)
|
860 |
+
|
861 |
+
language = gr.Dropdown(
|
862 |
+
label="π Language",
|
863 |
+
choices=language_choices,
|
864 |
+
value="English"
|
865 |
+
)
|
866 |
+
|
867 |
+
analyze_btn = gr.Button(
|
868 |
+
"π Analyze Message",
|
869 |
+
variant="primary",
|
870 |
+
size="lg"
|
871 |
+
)
|
872 |
+
|
873 |
+
new_chat_btn = gr.Button("π New Chat", variant="secondary")
|
874 |
+
|
875 |
+
with gr.Row():
|
876 |
+
output = gr.HTML(
|
877 |
+
label="π Analysis Results",
|
878 |
+
elem_classes=["analysis-output"]
|
879 |
+
)
|
880 |
+
|
881 |
+
with gr.Row():
|
882 |
+
report_btn = gr.Button("π¨ Report to IT", visible=False, variant="stop")
|
883 |
+
ignore_btn = gr.Button("π Ignore Message", visible=False)
|
884 |
+
|
885 |
+
report_msg = gr.Textbox(label="π£ IT Confirmation", visible=False, interactive=False)
|
886 |
+
|
887 |
+
with gr.Row():
|
888 |
+
audio_output = gr.Audio(label="π Voice Output (Click to Play)", interactive=False, autoplay=False)
|
889 |
+
report_file = gr.File(label="π Download Security Report", interactive=False)
|
890 |
+
|
891 |
+
with gr.Accordion("π Risk History & Detailed Reports", open=False):
|
892 |
+
history_table = gr.Dataframe(
|
893 |
+
headers=["Time", "Preview", "Threat Score", "Type", "Status"],
|
894 |
+
label="π Risk History Log",
|
895 |
+
interactive=False,
|
896 |
+
wrap=True
|
897 |
+
)
|
898 |
+
clear_btn = gr.Button("π§Ή Clear History")
|
899 |
+
|
900 |
+
selected_idx = gr.Dropdown(label="π Select Report to View", choices=[], interactive=True)
|
901 |
+
full_view = gr.Textbox(label="π Detailed Analysis", lines=12, interactive=False)
|
902 |
+
|
903 |
+
with gr.Accordion("βΉοΈ Help & Information", open=False):
|
904 |
+
gr.Markdown("""
|
905 |
+
### How to Use
|
906 |
+
1. **Paste or type** the suspicious message in the text box
|
907 |
+
2. **Upload a file** (PDF or TXT) if needed
|
908 |
+
3. **Select your role** for personalized advice
|
909 |
+
4. **Click Analyze** to get results
|
910 |
+
|
911 |
+
### Threat Types
|
912 |
+
- π’ **Safe**: No threats detected
|
913 |
+
- π‘ **Spam**: Unwanted promotional content
|
914 |
+
- π **Suspicious**: Potentially harmful content
|
915 |
+
- π΄ **Phishing**: Attempts to steal information
|
916 |
+
- π΄ **Malware**: Malicious software threats
|
917 |
+
|
918 |
+
### Tips
|
919 |
+
- Always verify suspicious requests through official channels
|
920 |
+
- Never click links or download attachments from unknown senders
|
921 |
+
- When in doubt, contact your IT security team
|
922 |
+
""")
|
923 |
+
|
924 |
+
with gr.Accordion("π Glossary Help", open=False):
|
925 |
+
gr.Markdown("""
|
926 |
+
**Hover over underlined blue terms in the analysis to see their definitions:**
|
927 |
+
- **Phishing**: A type of online scam where attackers trick you into giving away personal information
|
928 |
+
- **Domain Spoofing**: When a fake website mimics a trusted one by using a similar-looking web address
|
929 |
+
- **Malware**: Software designed to harm or gain unauthorized access to your device or data
|
930 |
+
- **Spam**: Unwanted or unsolicited messages, usually advertisements or scams
|
931 |
+
- **Tone**: The emotional tone in a message, like being urgent or friendly
|
932 |
+
""")
|
933 |
+
|
934 |
+
with gr.Accordion("π€ AI Pipeline Info", open=False):
|
935 |
+
gr.Markdown("""
|
936 |
+
**Three-Stage Hybrid Analysis Pipeline:**
|
937 |
+
1. **Stage 1 - BERT Model:** Technical phishing pattern detection
|
938 |
+
2. **Stage 1.5 - RAG Reranking:** LLaMA semantic reanalysis for intent understanding
|
939 |
+
3. **Stage 2 - Final Interpretation:** User-friendly explanation generation
|
940 |
+
|
941 |
+
**RAG-Based Reranking Benefits:**
|
942 |
+
β
**Semantic Understanding:** LLaMA analyzes intent and context, not just patterns
|
943 |
+
β
**Social Engineering Detection:** Better detection of psychological manipulation
|
944 |
+
β
**Hybrid Decision Making:** Combines pattern matching with contextual analysis
|
945 |
+
β
**Reduced False Positives:** More accurate classification of legitimate messages
|
946 |
+
|
947 |
+
**How It Works:**
|
948 |
+
- BERT identifies technical patterns (suspicious links, keywords, etc.)
|
949 |
+
- LLaMA reanalyzes for social engineering, urgency, and intent
|
950 |
+
- System combines both analyses for final classification
|
951 |
+
- Prioritizes safety while reducing false alarms
|
952 |
+
|
953 |
+
**Message Classification:**
|
954 |
+
- Safe: Normal, legitimate messages
|
955 |
+
- Spam: Unwanted promotional content
|
956 |
+
- Phishing: Attempts to steal personal information
|
957 |
+
- Malware: Messages with malicious attachments or links
|
958 |
+
""")
|
959 |
+
|
960 |
+
# Event handlers
|
961 |
+
analyze_btn.click(
|
962 |
+
fn=analyze_message_interface,
|
963 |
+
inputs=[text_input, file_input, language, role],
|
964 |
+
outputs=[output, report_btn, history_table, selected_idx, full_view, audio_output, report_msg, ignore_btn, report_file]
|
965 |
+
)
|
966 |
+
|
967 |
+
new_chat_btn.click(
|
968 |
+
fn=new_chat,
|
969 |
+
inputs=[],
|
970 |
+
outputs=[text_input, file_input, output, report_btn, ignore_btn, report_msg, audio_output, report_file]
|
971 |
+
)
|
972 |
+
|
973 |
+
selected_idx.change(fn=view_full_report, inputs=[selected_idx], outputs=[full_view])
|
974 |
+
report_btn.click(fn=report_to_it, inputs=[language], outputs=[report_msg, history_table])
|
975 |
+
report_btn.click(lambda: gr.update(visible=True), outputs=report_msg)
|
976 |
+
ignore_btn.click(fn=ignore_latest, outputs=[history_table])
|
977 |
+
clear_btn.click(fn=clear_history, outputs=[history_table, selected_idx, full_view, report_msg])
|
978 |
+
|
979 |
+
return demo
|
980 |
+
|
981 |
+
# Create and launch the interface
|
982 |
+
if __name__ == "__main__":
|
983 |
+
demo = create_interface()
|
984 |
+
demo.launch(
|
985 |
+
server_name="0.0.0.0",
|
986 |
+
server_port=7860,
|
987 |
+
show_error=True,
|
988 |
+
share=False
|
989 |
+
)
|