abdull4h commited on
Commit
0f16575
·
verified ·
1 Parent(s): 66c1f5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +319 -240
app.py CHANGED
@@ -6,18 +6,22 @@ import random
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
7
  import torch
8
  import time
 
 
 
 
9
 
10
  # Custom CSS for better styling
11
  custom_css = """
12
  .gradio-container {
13
  max-width: 1200px !important;
14
  }
15
- .alert-box {
16
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
17
  color: white;
18
- padding: 20px;
19
- border-radius: 10px;
20
- margin: 10px 0;
21
  }
22
  .status-success {
23
  background: #d4edda;
@@ -26,91 +30,171 @@ custom_css = """
26
  padding: 10px;
27
  border-radius: 5px;
28
  }
29
- .gpt-oss-badge {
30
- background: linear-gradient(45deg, #00c6ff, #0072ff);
31
- color: white;
32
- padding: 5px 10px;
33
- border-radius: 15px;
34
- font-weight: bold;
35
- }
36
  """
37
 
38
  # Global variables for model management
39
  model = None
40
  tokenizer = None
41
- model_status = "🔄 Initializing GPT-OSS-20B..."
42
 
43
- # Initialize GPT-OSS-20B with proper harmony format
44
  @spaces.GPU
45
- def initialize_gpt_oss():
46
- """Initialize OpenAI GPT-OSS-20B with harmony response format"""
47
  global model, tokenizer, model_status
48
 
49
- try:
50
- model_id = "openai/gpt-oss-20b"
51
- print(f"🚀 Loading {model_id}...")
52
-
53
- # Check GPU availability
54
- device = "cuda" if torch.cuda.is_available() else "cpu"
55
- print(f"Device: {device}")
56
-
57
- if torch.cuda.is_available():
58
- print(f"GPU: {torch.cuda.get_device_name()}")
59
- print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
60
-
61
- # Load tokenizer
62
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
63
- print(" Tokenizer loaded")
64
-
65
- # Load model with optimized settings for zeroGPU
66
- model = AutoModelForCausalLM.from_pretrained(
67
- model_id,
68
- torch_dtype="auto", # Let it choose best dtype (MXFP4)
69
- device_map="auto", # Automatic GPU placement
70
- trust_remote_code=True,
71
- low_cpu_mem_usage=True,
72
- # MXFP4 quantization is built-in
73
- )
74
-
75
- print(" Model loaded with MXFP4 quantization")
76
- print(f"Model device: {next(model.parameters()).device}")
77
-
78
- # Test generation to ensure everything works
79
- test_messages = [
80
- {"role": "user", "content": "Hello, test message."}
81
- ]
82
-
83
- test_inputs = tokenizer.apply_chat_template(
84
- test_messages,
85
- add_generation_prompt=True,
86
- return_tensors="pt",
87
- return_dict=True,
88
- )
89
-
90
- if device == "cuda":
91
- test_inputs = {k: v.to(model.device) for k, v in test_inputs.items()}
92
-
93
- with torch.no_grad():
94
- test_output = model.generate(
95
- **test_inputs,
96
- max_new_tokens=10,
97
- do_sample=False,
98
- pad_token_id=tokenizer.eos_token_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  )
100
-
101
- print("✅ Test generation successful")
102
-
103
- model_status = f"✅ OpenAI GPT-OSS-20B loaded successfully on {device} | MXFP4 Quantized | ~16GB Memory"
104
- return model_status
105
-
106
- except Exception as e:
107
- error_msg = f"❌ Failed to load GPT-OSS-20B: {str(e)}"
108
- print(error_msg)
109
- model_status = error_msg
110
- model, tokenizer = None, None
111
- return model_status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
- # Enhanced attack scenarios
114
  ATTACK_SCENARIOS = {
115
  "🔄 Lateral Movement": {
116
  "description": "Advanced Persistent Threat (APT) - Attacker moving laterally through network after initial compromise",
@@ -231,134 +315,129 @@ ATTACK_SCENARIOS = {
231
  }
232
 
233
  @spaces.GPU
234
- def generate_gpt_oss_analysis(alert_data, analyst_level):
235
- """Generate analysis using OpenAI GPT-OSS-20B with harmony format"""
236
 
237
  if not model or not tokenizer:
238
  return get_fallback_analysis(alert_data, analyst_level)
239
 
240
- # Enhanced prompts designed for GPT-OSS reasoning capabilities
241
  security_prompts = {
242
- "L1": f"""You are a Level 1 SOC analyst conducting initial triage. Analyze this security alert and provide immediate actionable recommendations.
243
-
244
- **SECURITY ALERT:**
245
- - ID: {alert_data['id']}
246
- - Type: {alert_data['alert_type']}
247
- - Severity: {alert_data['severity']}
248
- - Source: {alert_data['source_ip']} → {alert_data['destination_ip']}
249
- - User: {alert_data['user']}
250
- - Evidence: {alert_data['raw_log']}
251
- - Intel: {alert_data['threat_intel']}
252
- - MITRE ATT&CK: {alert_data['mitre_tactic']}
253
- - Confidence: {alert_data['confidence']}%
254
-
255
- **PROVIDE L1 TRIAGE:**
256
- 1. Immediate containment actions
257
- 2. Risk assessment
258
- 3. Escalation decision with reasoning
259
- 4. Priority timeline
260
-
261
- Think step-by-step about the threat level and required response.""",
262
-
263
- "L2": f"""You are a Level 2 SOC analyst conducting detailed investigation. Perform comprehensive analysis of this cybersecurity incident.
264
-
265
- **INCIDENT DETAILS:**
266
- - Alert: {alert_data['alert_type']} | Severity: {alert_data['severity']}
267
- - Network Flow: {alert_data['source_ip']} → {alert_data['destination_ip']}
268
- - User Context: {alert_data['user']}
269
- - Technical Evidence: {alert_data['raw_log']}
270
- - Threat Intelligence: {alert_data['threat_intel']}
271
- - MITRE ATT&CK Technique: {alert_data['mitre_tactic']}
272
- - Detection Confidence: {alert_data['confidence']}%
273
-
274
- **CONDUCT L2 INVESTIGATION:**
275
- 1. Technical root cause analysis
276
- 2. Evidence correlation and timeline
277
- 3. Threat actor behavior analysis
278
- 4. Impact assessment and containment strategy
279
- 5. Investigation roadmap
280
-
281
- Use chain-of-thought reasoning to analyze the attack progression and recommend next steps.""",
282
-
283
- "L3": f"""You are a senior cybersecurity expert analyzing a sophisticated threat. Provide strategic assessment and executive-level recommendations.
284
-
285
- **THREAT INTELLIGENCE:**
286
- - Attack Vector: {alert_data['description']}
287
- - Technical Indicators: {alert_data['raw_log']}
288
- - Attribution Context: {alert_data['threat_intel']}
289
- - MITRE Technique: {alert_data['mitre_tactic']}
290
- - Confidence Level: {alert_data['confidence']}%
291
-
292
- **DELIVER L3 EXPERT ANALYSIS:**
293
- 1. Adversary attribution and campaign analysis
294
- 2. Strategic threat landscape assessment
295
- 3. Business impact and risk quantification
296
- 4. Comprehensive response strategy
297
- 5. Executive briefing points
298
-
299
- Apply deep reasoning to assess the broader implications and provide strategic recommendations."""
300
  }
301
 
302
  try:
303
  prompt = security_prompts.get(analyst_level, security_prompts["L2"])
304
 
305
- # Use proper harmony format for chat
306
- messages = [
307
- {"role": "user", "content": prompt}
308
- ]
309
-
310
- # Apply chat template (automatically uses harmony format)
311
- inputs = tokenizer.apply_chat_template(
312
- messages,
313
- add_generation_prompt=True,
314
- return_tensors="pt",
315
- return_dict=True,
316
- )
317
-
318
- # Move to device if using GPU
319
- if torch.cuda.is_available():
320
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
321
-
322
- # Generate with optimized parameters for reasoning
323
- with torch.no_grad():
324
- outputs = model.generate(
325
- **inputs,
326
- max_new_tokens=500,
327
  do_sample=True,
328
- temperature=0.2, # Lower for focused analysis
329
  top_p=0.9,
330
- top_k=50,
331
  repetition_penalty=1.1,
332
- pad_token_id=tokenizer.eos_token_id,
333
- eos_token_id=tokenizer.eos_token_id
334
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
- # Decode the response
337
- input_length = inputs["input_ids"].shape[-1]
338
- generated_tokens = outputs[0][input_length:]
339
- analysis = tokenizer.decode(generated_tokens, skip_special_tokens=True)
340
-
341
- # Ensure quality
342
- if len(analysis.strip()) < 100:
343
  return get_fallback_analysis(alert_data, analyst_level)
344
 
345
- return f"""🤖 **OpenAI GPT-OSS-20B Analysis**
346
- <div class="gpt-oss-badge">Powered by GPT-OSS-20B • MoE Architecture • MXFP4 Quantized</div>
347
-
 
 
 
 
348
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
349
 
350
  {analysis.strip()}
351
 
352
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
353
- *Analysis generated using OpenAI's latest open-weight reasoning model*
354
- *21B parameters • 3.6B active per token • Apache 2.0 licensed*"""
355
 
356
  except Exception as e:
357
- print(f"GPT-OSS Error: {e}")
358
- return f"⚠️ GPT-OSS Error: {str(e)[:100]}\n\n{get_fallback_analysis(alert_data, analyst_level)}"
359
 
360
  def get_fallback_analysis(alert_data, analyst_level):
361
- """High-quality fallback when model fails"""
362
 
363
  templates = {
364
  "L1": f"""🚨 **L1 SOC TRIAGE ANALYSIS**
@@ -372,39 +451,39 @@ Confidence: {alert_data['confidence']}% | Source: {alert_data['source_ip']}
372
  1. Isolate affected system: {alert_data['source_ip']}
373
  2. Disable user account: {alert_data['user']}
374
  3. Block connections to: {alert_data['destination_ip']}
375
- 4. Preserve evidence for investigation
376
 
377
  **⬆️ ESCALATION DECISION:**
378
  Severity: {alert_data['severity']} → ESCALATE TO L2
379
  Technique: {alert_data['mitre_tactic']} requires deeper analysis
380
 
381
- **📝 INITIAL ASSESSMENT:**
382
  {alert_data['threat_intel']}""",
383
 
384
  "L2": f"""🔍 **L2 INVESTIGATION ANALYSIS**
385
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
386
 
387
- **🎯 ATTACK VECTOR ANALYSIS:**
388
  Technique: {alert_data['mitre_tactic']}
389
  Evidence: {alert_data['raw_log']}
390
  Context: {alert_data['description']}
391
 
392
  **🔬 INVESTIGATION ROADMAP:**
393
- 1. Timeline correlation: ±30min from {alert_data['timestamp']}
394
- 2. User behavior analysis: {alert_data['user']} baseline comparison
395
  3. Network flow analysis: {alert_data['source_ip']} → {alert_data['destination_ip']}
396
- 4. Process tree examination: Parent/child relationships
397
- 5. Artifact collection: Memory dumps, logs, files
398
 
399
  **📊 THREAT ASSESSMENT:**
400
  Confidence Level: {alert_data['confidence']}%
401
  Business Impact: {alert_data['severity']}
402
- Attribution: {alert_data['threat_intel']}
403
 
404
- **🎯 RECOMMENDATIONS:**
405
  Deploy hunting queries for similar TTPs
406
- Review authentication logs for compromise
407
- Consider L3 escalation if campaign indicators found""",
408
 
409
  "L3": f"""🎯 **L3 EXPERT STRATEGIC ANALYSIS**
410
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
@@ -416,26 +495,26 @@ Success Probability: {alert_data['confidence']}%
416
 
417
  **💼 BUSINESS IMPACT:**
418
  Severity Level: {alert_data['severity']}
419
- Executive Notification: Required
420
- Regulatory Implications: Under review
421
 
422
  **🛡️ STRATEGIC RESPONSE:**
423
- Immediate: Threat hunting deployment across environment
424
- Short-term: Enhanced monitoring and detection rules
425
- Medium-term: Security architecture review
426
- Long-term: Threat intelligence integration enhancement
427
 
428
- **📈 EXECUTIVE BRIEFING:**
429
- • Sophisticated attack requiring coordinated response
430
  • High potential for lateral movement and data exfiltration
431
- • Recommend incident response team activation
432
- • Consider external forensics support engagement"""
433
  }
434
 
435
  return templates.get(analyst_level, templates["L2"])
436
 
437
- def analyze_alert_with_gpt_oss(scenario_name, alert_index, analyst_level):
438
- """Main analysis function using GPT-OSS-20B"""
439
  start_time = time.time()
440
 
441
  if scenario_name not in ATTACK_SCENARIOS:
@@ -449,8 +528,8 @@ def analyze_alert_with_gpt_oss(scenario_name, alert_index, analyst_level):
449
 
450
  selected_alert = alerts[alert_index]
451
 
452
- # Generate analysis using GPT-OSS-20B
453
- analysis = generate_gpt_oss_analysis(selected_alert, analyst_level)
454
 
455
  # Format alert details
456
  alert_details = f"""🎫 **ALERT {selected_alert['id']}** | 🕐 {selected_alert['timestamp']}
@@ -507,27 +586,27 @@ def get_scenario_info(scenario_name):
507
  """
508
 
509
  info += """
510
- ### 🤖 **AI-Powered Analysis:**
511
- - **OpenAI GPT-OSS-20B:** Latest open-weight reasoning model
512
- - **MXFP4 Quantization:** Optimized for efficient inference
513
- - **Harmony Format:** Advanced response structure
514
- - **21B Parameters:** With 3.6B active per token (MoE)"""
515
 
516
  return info
517
  return "⚠️ No scenario selected."
518
 
519
  # Create Gradio interface
520
- with gr.Blocks(title="SOC Assistant - GPT-OSS-20B", theme=gr.themes.Soft(), css=custom_css) as demo:
521
 
522
  gr.Markdown("""
523
- # 🛡️ SOC LLM Assistant - OpenAI GPT-OSS-20B Edition
524
- **Powered by OpenAI's Latest Open-Weight Reasoning Model**
525
 
526
- *First open-weight model from OpenAI since GPT-2 Released August 8, 2025*
527
  """)
528
 
529
  # Model status display
530
- status_display = gr.Markdown("🔄 Loading OpenAI GPT-OSS-20B...")
531
 
532
  with gr.Row():
533
  # Left Panel
@@ -562,26 +641,26 @@ with gr.Blocks(title="SOC Assistant - GPT-OSS-20B", theme=gr.themes.Soft(), css=
562
  )
563
 
564
  analyze_btn = gr.Button(
565
- "🚀 Analyze with GPT-OSS-20B",
566
  variant="primary",
567
  size="lg"
568
  )
569
 
570
  init_btn = gr.Button(
571
- "🔄 Reinitialize Model",
572
  variant="secondary"
573
  )
574
 
575
  gr.Markdown("---")
576
- gr.Markdown("## 🤖 Model Information")
577
  gr.Markdown("""
578
- **🎯 GPT-OSS-20B Features:**
579
- - 21B parameters (3.6B active)
580
- - MXFP4 quantization
581
- - 128K context length
582
- - Apache 2.0 licensed
583
- - Harmony response format
584
- - Reasoning capabilities
585
  """)
586
 
587
  # Right Panel
@@ -593,9 +672,9 @@ with gr.Blocks(title="SOC Assistant - GPT-OSS-20B", theme=gr.themes.Soft(), css=
593
  interactive=False
594
  )
595
 
596
- gr.Markdown("## 🤖 GPT-OSS-20B Analysis")
597
  analysis_output = gr.Textbox(
598
- label="🧠 AI-Powered Security Analysis",
599
  lines=25,
600
  interactive=False
601
  )
@@ -608,17 +687,17 @@ with gr.Blocks(title="SOC Assistant - GPT-OSS-20B", theme=gr.themes.Soft(), css=
608
 
609
  gr.Markdown("""
610
  ---
611
- ## 🎉 **About OpenAI GPT-OSS-20B**
612
-
613
- Released August 8, 2025 - OpenAI's first open-weight model since GPT-2! This groundbreaking release features:
614
 
615
- - **🧠 Advanced Reasoning:** Comparable to o3-mini performance
616
- - **⚡ Efficient Architecture:** MoE with only 3.6B active parameters per token
617
- - **🔧 Harmony Format:** New structured response system for better tool use
618
- - **📱 Consumer Hardware:** Runs on just 16GB memory
619
- - **🔓 Open License:** Apache 2.0 - fully permissive for commercial use
620
 
621
- Perfect for cybersecurity analysis requiring sophisticated reasoning and chain-of-thought capabilities!
 
 
 
622
 
623
  ---
624
  **👨‍🎓 Research:** Abdullah Alanazi | **🏛️ KAUST** | **👨‍🏫 Prof. Ali Shoker**
@@ -644,13 +723,13 @@ with gr.Blocks(title="SOC Assistant - GPT-OSS-20B", theme=gr.themes.Soft(), css=
644
  )
645
 
646
  analyze_btn.click(
647
- fn=analyze_alert_with_gpt_oss,
648
  inputs=[scenario_dropdown, alert_slider, analyst_level],
649
  outputs=[alert_output, analysis_output, status_output]
650
  )
651
 
652
  init_btn.click(
653
- fn=initialize_gpt_oss,
654
  outputs=[status_display]
655
  )
656
 
@@ -662,7 +741,7 @@ with gr.Blocks(title="SOC Assistant - GPT-OSS-20B", theme=gr.themes.Soft(), css=
662
  )
663
 
664
  demo.load(
665
- fn=initialize_gpt_oss,
666
  outputs=[status_display]
667
  )
668
 
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
7
  import torch
8
  import time
9
+ import warnings
10
+
11
+ # Suppress warnings for cleaner output
12
+ warnings.filterwarnings("ignore")
13
 
14
  # Custom CSS for better styling
15
  custom_css = """
16
  .gradio-container {
17
  max-width: 1200px !important;
18
  }
19
+ .gpt-oss-badge {
20
+ background: linear-gradient(45deg, #00c6ff, #0072ff);
21
  color: white;
22
+ padding: 5px 10px;
23
+ border-radius: 15px;
24
+ font-weight: bold;
25
  }
26
  .status-success {
27
  background: #d4edda;
 
30
  padding: 10px;
31
  border-radius: 5px;
32
  }
 
 
 
 
 
 
 
33
  """
34
 
35
  # Global variables for model management
36
  model = None
37
  tokenizer = None
38
+ model_status = "🔄 Initializing..."
39
 
 
40
  @spaces.GPU
41
+ def initialize_gpt_oss_safe():
42
+ """Initialize GPT-OSS-20B with multiple fallback strategies"""
43
  global model, tokenizer, model_status
44
 
45
+ # Strategy 1: Try GPT-OSS-20B with specific settings
46
+ strategies = [
47
+ {
48
+ "model_id": "openai/gpt-oss-20b",
49
+ "name": "GPT-OSS-20B (Original)",
50
+ "config": {
51
+ "torch_dtype": "auto",
52
+ "device_map": "auto",
53
+ "trust_remote_code": True,
54
+ "low_cpu_mem_usage": True,
55
+ "use_safetensors": True
56
+ }
57
+ },
58
+ {
59
+ "model_id": "openai/gpt-oss-20b",
60
+ "name": "GPT-OSS-20B (BF16)",
61
+ "config": {
62
+ "torch_dtype": torch.bfloat16,
63
+ "device_map": "auto",
64
+ "trust_remote_code": True,
65
+ "low_cpu_mem_usage": True
66
+ }
67
+ },
68
+ {
69
+ "model_id": "openai/gpt-oss-20b",
70
+ "name": "GPT-OSS-20B (FP16)",
71
+ "config": {
72
+ "torch_dtype": torch.float16,
73
+ "device_map": "auto",
74
+ "trust_remote_code": True,
75
+ "low_cpu_mem_usage": True
76
+ }
77
+ },
78
+ {
79
+ "model_id": "microsoft/DialoGPT-large",
80
+ "name": "DialoGPT-Large (Fallback)",
81
+ "config": {
82
+ "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
83
+ "device_map": "auto" if torch.cuda.is_available() else None
84
+ }
85
+ }
86
+ ]
87
+
88
+ device = "cuda" if torch.cuda.is_available() else "cpu"
89
+ print(f"🔧 Device: {device}")
90
+
91
+ if torch.cuda.is_available():
92
+ print(f"🎮 GPU: {torch.cuda.get_device_name()}")
93
+ print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
94
+
95
+ for i, strategy in enumerate(strategies):
96
+ try:
97
+ model_id = strategy["model_id"]
98
+ config = strategy["config"]
99
+ name = strategy["name"]
100
+
101
+ print(f"\n🔄 Strategy {i+1}: Trying {name}")
102
+ print(f"📦 Model: {model_id}")
103
+
104
+ # Load tokenizer first
105
+ print("🔤 Loading tokenizer...")
106
+ tokenizer = AutoTokenizer.from_pretrained(
107
+ model_id,
108
+ trust_remote_code=True,
109
+ use_fast=True
110
  )
111
+
112
+ # Handle pad token for generation
113
+ if tokenizer.pad_token is None:
114
+ tokenizer.pad_token = tokenizer.eos_token
115
+
116
+ print("✅ Tokenizer loaded successfully")
117
+
118
+ # Load model with strategy-specific config
119
+ print("🧠 Loading model...")
120
+ model = AutoModelForCausalLM.from_pretrained(
121
+ model_id,
122
+ **config
123
+ )
124
+
125
+ print(f"📍 Model device: {next(model.parameters()).device}")
126
+ print(f"🔢 Model dtype: {next(model.parameters()).dtype}")
127
+
128
+ # Test generation to ensure everything works
129
+ print("🧪 Testing generation...")
130
+ test_messages = [{"role": "user", "content": "Hello, test."}]
131
+
132
+ try:
133
+ # Use pipeline for simpler testing
134
+ test_pipe = pipeline(
135
+ "text-generation",
136
+ model=model,
137
+ tokenizer=tokenizer,
138
+ torch_dtype=config.get("torch_dtype", "auto"),
139
+ device_map="auto" if torch.cuda.is_available() else None
140
+ )
141
+
142
+ test_result = test_pipe(
143
+ test_messages,
144
+ max_new_tokens=10,
145
+ do_sample=False
146
+ )
147
+
148
+ print("✅ Generation test successful")
149
+
150
+ except Exception as test_error:
151
+ print(f"⚠️ Pipeline test failed, trying direct generation: {test_error}")
152
+
153
+ # Fallback to direct generation
154
+ inputs = tokenizer.apply_chat_template(
155
+ test_messages,
156
+ add_generation_prompt=True,
157
+ return_tensors="pt",
158
+ return_dict=True,
159
+ )
160
+
161
+ if torch.cuda.is_available():
162
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
163
+
164
+ with torch.no_grad():
165
+ outputs = model.generate(
166
+ **inputs,
167
+ max_new_tokens=5,
168
+ do_sample=False,
169
+ pad_token_id=tokenizer.eos_token_id
170
+ )
171
+
172
+ print("✅ Direct generation test successful")
173
+
174
+ # If we get here, the strategy worked
175
+ model_status = f"✅ {name} loaded successfully on {device}"
176
+ print(f"🎉 Success: {model_status}")
177
+ return model_status
178
+
179
+ except Exception as e:
180
+ error_msg = str(e)
181
+ print(f"❌ Strategy {i+1} failed: {error_msg[:100]}...")
182
+
183
+ # Clear any partially loaded components
184
+ model, tokenizer = None, None
185
+
186
+ # Clear GPU memory if available
187
+ if torch.cuda.is_available():
188
+ torch.cuda.empty_cache()
189
+
190
+ continue
191
+
192
+ # If all strategies failed
193
+ model_status = "❌ All model loading strategies failed. Using text-only fallback."
194
+ print(model_status)
195
+ return model_status
196
 
197
+ # Enhanced attack scenarios (keeping the same as before)
198
  ATTACK_SCENARIOS = {
199
  "🔄 Lateral Movement": {
200
  "description": "Advanced Persistent Threat (APT) - Attacker moving laterally through network after initial compromise",
 
315
  }
316
 
317
  @spaces.GPU
318
+ def generate_analysis_safe(alert_data, analyst_level):
319
+ """Generate analysis with safe error handling"""
320
 
321
  if not model or not tokenizer:
322
  return get_fallback_analysis(alert_data, analyst_level)
323
 
 
324
  security_prompts = {
325
+ "L1": f"""As a Level 1 SOC analyst, provide immediate triage for this security alert:
326
+
327
+ Alert: {alert_data['alert_type']} | Severity: {alert_data['severity']}
328
+ Source: {alert_data['source_ip']} → {alert_data['destination_ip']}
329
+ User: {alert_data['user']}
330
+ Evidence: {alert_data['raw_log']}
331
+ MITRE: {alert_data['mitre_tactic']}
332
+
333
+ Provide: immediate actions, risk assessment, escalation decision.""",
334
+
335
+ "L2": f"""As a Level 2 SOC analyst, investigate this cybersecurity incident:
336
+
337
+ Alert: {alert_data['alert_type']} | Severity: {alert_data['severity']}
338
+ Network: {alert_data['source_ip']} → {alert_data['destination_ip']}
339
+ User: {alert_data['user']}
340
+ Evidence: {alert_data['raw_log']}
341
+ Intel: {alert_data['threat_intel']}
342
+ MITRE: {alert_data['mitre_tactic']}
343
+ Confidence: {alert_data['confidence']}%
344
+
345
+ Provide: root cause analysis, investigation steps, threat assessment.""",
346
+
347
+ "L3": f"""As a senior cybersecurity expert, analyze this sophisticated threat:
348
+
349
+ Attack: {alert_data['description']}
350
+ Evidence: {alert_data['raw_log']}
351
+ Attribution: {alert_data['threat_intel']}
352
+ MITRE: {alert_data['mitre_tactic']}
353
+ Confidence: {alert_data['confidence']}%
354
+
355
+ Provide: strategic assessment, attribution analysis, response plan."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  }
357
 
358
  try:
359
  prompt = security_prompts.get(analyst_level, security_prompts["L2"])
360
 
361
+ # Try pipeline approach first (safer)
362
+ try:
363
+ pipe = pipeline(
364
+ "text-generation",
365
+ model=model,
366
+ tokenizer=tokenizer,
367
+ torch_dtype="auto",
368
+ device_map="auto" if torch.cuda.is_available() else None
369
+ )
370
+
371
+ messages = [{"role": "user", "content": prompt}]
372
+
373
+ result = pipe(
374
+ messages,
375
+ max_new_tokens=400,
 
 
 
 
 
 
 
376
  do_sample=True,
377
+ temperature=0.3,
378
  top_p=0.9,
 
379
  repetition_penalty=1.1,
380
+ pad_token_id=tokenizer.eos_token_id
 
381
  )
382
+
383
+ analysis = result[0]["generated_text"][-1]["content"]
384
+
385
+ except Exception as pipe_error:
386
+ print(f"Pipeline failed, trying direct generation: {pipe_error}")
387
+
388
+ # Fallback to direct generation
389
+ messages = [{"role": "user", "content": prompt}]
390
+
391
+ inputs = tokenizer.apply_chat_template(
392
+ messages,
393
+ add_generation_prompt=True,
394
+ return_tensors="pt",
395
+ return_dict=True,
396
+ )
397
+
398
+ if torch.cuda.is_available():
399
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
400
+
401
+ with torch.no_grad():
402
+ outputs = model.generate(
403
+ **inputs,
404
+ max_new_tokens=400,
405
+ do_sample=True,
406
+ temperature=0.3,
407
+ top_p=0.9,
408
+ repetition_penalty=1.1,
409
+ pad_token_id=tokenizer.eos_token_id,
410
+ eos_token_id=tokenizer.eos_token_id
411
+ )
412
+
413
+ input_length = inputs["input_ids"].shape[-1]
414
+ generated_tokens = outputs[0][input_length:]
415
+ analysis = tokenizer.decode(generated_tokens, skip_special_tokens=True)
416
 
417
+ # Quality check
418
+ if len(analysis.strip()) < 50:
 
 
 
 
 
419
  return get_fallback_analysis(alert_data, analyst_level)
420
 
421
+ # Determine model name for display
422
+ if "gpt-oss" in model_status.lower():
423
+ badge = "🤖 OpenAI GPT-OSS-20B Analysis"
424
+ else:
425
+ badge = "🤖 AI-Powered Analysis"
426
+
427
+ return f"""{badge}
428
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
429
 
430
  {analysis.strip()}
431
 
432
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
433
+ *Generated using advanced AI reasoning capabilities*"""
 
434
 
435
  except Exception as e:
436
+ print(f"Generation error: {e}")
437
+ return f"⚠️ AI Error: {str(e)[:100]}\n\n{get_fallback_analysis(alert_data, analyst_level)}"
438
 
439
  def get_fallback_analysis(alert_data, analyst_level):
440
+ """High-quality fallback analysis"""
441
 
442
  templates = {
443
  "L1": f"""🚨 **L1 SOC TRIAGE ANALYSIS**
 
451
  1. Isolate affected system: {alert_data['source_ip']}
452
  2. Disable user account: {alert_data['user']}
453
  3. Block connections to: {alert_data['destination_ip']}
454
+ 4. Preserve evidence and logs
455
 
456
  **⬆️ ESCALATION DECISION:**
457
  Severity: {alert_data['severity']} → ESCALATE TO L2
458
  Technique: {alert_data['mitre_tactic']} requires deeper analysis
459
 
460
+ **📝 INITIAL NOTES:**
461
  {alert_data['threat_intel']}""",
462
 
463
  "L2": f"""🔍 **L2 INVESTIGATION ANALYSIS**
464
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
465
 
466
+ **🎯 ATTACK VECTOR:**
467
  Technique: {alert_data['mitre_tactic']}
468
  Evidence: {alert_data['raw_log']}
469
  Context: {alert_data['description']}
470
 
471
  **🔬 INVESTIGATION ROADMAP:**
472
+ 1. Timeline correlation: ±30min window analysis
473
+ 2. User behavior baseline: {alert_data['user']} comparison
474
  3. Network flow analysis: {alert_data['source_ip']} → {alert_data['destination_ip']}
475
+ 4. Process tree examination and artifact collection
476
+ 5. Similar IOC hunting across environment
477
 
478
  **📊 THREAT ASSESSMENT:**
479
  Confidence Level: {alert_data['confidence']}%
480
  Business Impact: {alert_data['severity']}
481
+ Attribution Context: {alert_data['threat_intel']}
482
 
483
+ **🎯 NEXT STEPS:**
484
  Deploy hunting queries for similar TTPs
485
+ Review authentication logs for compromise indicators
486
+ Consider L3 escalation if campaign evidence found""",
487
 
488
  "L3": f"""🎯 **L3 EXPERT STRATEGIC ANALYSIS**
489
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 
495
 
496
  **💼 BUSINESS IMPACT:**
497
  Severity Level: {alert_data['severity']}
498
+ Executive Notification: Required for Critical/High
499
+ Regulatory Implications: Under compliance review
500
 
501
  **🛡️ STRATEGIC RESPONSE:**
502
+ Immediate: Threat hunting deployment across infrastructure
503
+ Short-term: Enhanced monitoring and detection rule updates
504
+ Medium-term: Security architecture review and gap analysis
505
+ Long-term: Threat intelligence integration and training
506
 
507
+ **📈 EXECUTIVE BRIEFING POINTS:**
508
+ • Sophisticated attack requiring coordinated incident response
509
  • High potential for lateral movement and data exfiltration
510
+ • Recommend immediate incident response team activation
511
+ • Consider external forensics engagement for complex analysis"""
512
  }
513
 
514
  return templates.get(analyst_level, templates["L2"])
515
 
516
+ def analyze_alert_with_ai(scenario_name, alert_index, analyst_level):
517
+ """Main analysis function with error handling"""
518
  start_time = time.time()
519
 
520
  if scenario_name not in ATTACK_SCENARIOS:
 
528
 
529
  selected_alert = alerts[alert_index]
530
 
531
+ # Generate analysis
532
+ analysis = generate_analysis_safe(selected_alert, analyst_level)
533
 
534
  # Format alert details
535
  alert_details = f"""🎫 **ALERT {selected_alert['id']}** | 🕐 {selected_alert['timestamp']}
 
586
  """
587
 
588
  info += """
589
+ ### 🤖 **AI Analysis Features:**
590
+ - **Multi-Strategy Loading:** Automatic fallback to compatible models
591
+ - **Advanced Reasoning:** Chain-of-thought analysis for complex threats
592
+ - **Error Recovery:** Robust handling of model loading issues
593
+ - **Quality Assurance:** Automatic fallback to expert templates"""
594
 
595
  return info
596
  return "⚠️ No scenario selected."
597
 
598
  # Create Gradio interface
599
+ with gr.Blocks(title="SOC Assistant - Fixed GPT-OSS", theme=gr.themes.Soft(), css=custom_css) as demo:
600
 
601
  gr.Markdown("""
602
+ # 🛡️ SOC LLM Assistant - Fixed GPT-OSS Edition
603
+ **Multi-Strategy Model Loading with Robust Error Handling**
604
 
605
+ *Automatically tries GPT-OSS-20B first, then falls back to compatible models*
606
  """)
607
 
608
  # Model status display
609
+ status_display = gr.Markdown("🔄 Initializing AI models with multiple strategies...")
610
 
611
  with gr.Row():
612
  # Left Panel
 
641
  )
642
 
643
  analyze_btn = gr.Button(
644
+ "🚀 Analyze with AI",
645
  variant="primary",
646
  size="lg"
647
  )
648
 
649
  init_btn = gr.Button(
650
+ "🔄 Retry Model Loading",
651
  variant="secondary"
652
  )
653
 
654
  gr.Markdown("---")
655
+ gr.Markdown("## 🔧 Loading Strategies")
656
  gr.Markdown("""
657
+ **🎯 Automatic Fallback:**
658
+ 1. GPT-OSS-20B (Original MXFP4)
659
+ 2. GPT-OSS-20B (BF16)
660
+ 3. GPT-OSS-20B (FP16)
661
+ 4. DialoGPT-Large (Backup)
662
+
663
+ **✅ Robust Error Handling**
664
  """)
665
 
666
  # Right Panel
 
672
  interactive=False
673
  )
674
 
675
+ gr.Markdown("## 🤖 AI-Powered Analysis")
676
  analysis_output = gr.Textbox(
677
+ label="🧠 Security Analysis",
678
  lines=25,
679
  interactive=False
680
  )
 
687
 
688
  gr.Markdown("""
689
  ---
690
+ ## 🔧 **Troubleshooting Guide**
 
 
691
 
692
+ **If you see "ModelWrapper" error:**
693
+ - **Fixed:** This version uses multiple loading strategies
694
+ - 🔄 **Automatic:** Falls back to compatible models
695
+ - 🛠️ **Manual:** Use "Retry Model Loading" button
 
696
 
697
+ **Loading Strategy Order:**
698
+ 1. **GPT-OSS-20B** - Latest OpenAI open-weight model
699
+ 2. **Fallback Models** - Tested compatible alternatives
700
+ 3. **Expert Templates** - High-quality manual analysis
701
 
702
  ---
703
  **👨‍🎓 Research:** Abdullah Alanazi | **🏛️ KAUST** | **👨‍🏫 Prof. Ali Shoker**
 
723
  )
724
 
725
  analyze_btn.click(
726
+ fn=analyze_alert_with_ai,
727
  inputs=[scenario_dropdown, alert_slider, analyst_level],
728
  outputs=[alert_output, analysis_output, status_output]
729
  )
730
 
731
  init_btn.click(
732
+ fn=initialize_gpt_oss_safe,
733
  outputs=[status_display]
734
  )
735
 
 
741
  )
742
 
743
  demo.load(
744
+ fn=initialize_gpt_oss_safe,
745
  outputs=[status_display]
746
  )
747