alohaboy commited on
Commit
e5009ef
Β·
1 Parent(s): 6e6e1e7

Fix normal bypass: pass debug_info to mitigation methods

Browse files
Files changed (1) hide show
  1. app.py +28 -7
app.py CHANGED
@@ -183,9 +183,9 @@ class HateSpeechDetectorService:
183
  if strategy == "Detection Only":
184
  return result_msg, mitigation
185
  elif strategy == "Guided":
186
- return self._guided_mitigation(text)
187
  elif strategy == "Guided+Reflect":
188
- return self._guided_reflect_mitigation(text)
189
  elif strategy == "Unguided":
190
  return self._unguided_mitigation(text)
191
  else:
@@ -348,11 +348,21 @@ Mitigated sentence:"""
348
  error_msg = f"❌ **Blossom LLM Error**\n\nError occurred: {str(e)}"
349
  return error_msg, "An error occurred during LLM processing."
350
 
351
- def _guided_mitigation(self, text):
352
  """Guided Mode: Mitigate based on detection result using LLM"""
353
  try:
354
- # First, perform detection with KcELECTRA
355
- detection_result, _, debug_info = self._detection_only(text)
 
 
 
 
 
 
 
 
 
 
356
  label = debug_info.get('label', 'normal')
357
  hate_tokens = debug_info.get('hate_tokens', [])
358
 
@@ -404,10 +414,21 @@ Mitigated sentence:"""
404
  error_msg = f"❌ **Guided Mitigation Error**\n\nError occurred: {str(e)}"
405
  return error_msg, "An error occurred during guided mitigation processing."
406
 
407
- def _guided_reflect_mitigation(self, text):
408
  """Guided+Reflect Mode: iterative refinement + critic evaluation"""
409
  try:
410
- detection_result, _, debug_info = self._detection_only(text)
 
 
 
 
 
 
 
 
 
 
 
411
  label = debug_info.get('label', 'normal')
412
  hate_tokens = debug_info.get('hate_tokens', [])
413
  # Step 1: Initial mitigation
 
183
  if strategy == "Detection Only":
184
  return result_msg, mitigation
185
  elif strategy == "Guided":
186
+ return self._guided_mitigation(text, debug_info)
187
  elif strategy == "Guided+Reflect":
188
+ return self._guided_reflect_mitigation(text, debug_info)
189
  elif strategy == "Unguided":
190
  return self._unguided_mitigation(text)
191
  else:
 
348
  error_msg = f"❌ **Blossom LLM Error**\n\nError occurred: {str(e)}"
349
  return error_msg, "An error occurred during LLM processing."
350
 
351
+ def _guided_mitigation(self, text, debug_info=None):
352
  """Guided Mode: Mitigate based on detection result using LLM"""
353
  try:
354
+ # Use provided debug_info or perform detection
355
+ if debug_info is None:
356
+ detection_result, _, debug_info = self._detection_only(text)
357
+ else:
358
+ # Reconstruct detection_result from debug_info
359
+ label = debug_info.get('label', 'normal')
360
+ confidence = debug_info.get('confidence', 0.0)
361
+ hate_tokens = debug_info.get('hate_tokens', [])
362
+ detection_result = f"πŸ” **Detection Result**\n\n**Classification:** {label}\n**Confidence:** {confidence:.2f}\n"
363
+ if hate_tokens:
364
+ detection_result += f"**Identified Expressions:** {hate_tokens}"
365
+
366
  label = debug_info.get('label', 'normal')
367
  hate_tokens = debug_info.get('hate_tokens', [])
368
 
 
414
  error_msg = f"❌ **Guided Mitigation Error**\n\nError occurred: {str(e)}"
415
  return error_msg, "An error occurred during guided mitigation processing."
416
 
417
+ def _guided_reflect_mitigation(self, text, debug_info=None):
418
  """Guided+Reflect Mode: iterative refinement + critic evaluation"""
419
  try:
420
+ # Use provided debug_info or perform detection
421
+ if debug_info is None:
422
+ detection_result, _, debug_info = self._detection_only(text)
423
+ else:
424
+ # Reconstruct detection_result from debug_info
425
+ label = debug_info.get('label', 'normal')
426
+ confidence = debug_info.get('confidence', 0.0)
427
+ hate_tokens = debug_info.get('hate_tokens', [])
428
+ detection_result = f"πŸ” **Detection Result**\n\n**Classification:** {label}\n**Confidence:** {confidence:.2f}\n"
429
+ if hate_tokens:
430
+ detection_result += f"**Identified Expressions:** {hate_tokens}"
431
+
432
  label = debug_info.get('label', 'normal')
433
  hate_tokens = debug_info.get('hate_tokens', [])
434
  # Step 1: Initial mitigation