alohaboy
commited on
Commit
Β·
e5009ef
1
Parent(s):
6e6e1e7
Fix normal bypass: pass debug_info to mitigation methods
Browse files
app.py
CHANGED
@@ -183,9 +183,9 @@ class HateSpeechDetectorService:
|
|
183 |
if strategy == "Detection Only":
|
184 |
return result_msg, mitigation
|
185 |
elif strategy == "Guided":
|
186 |
-
return self._guided_mitigation(text)
|
187 |
elif strategy == "Guided+Reflect":
|
188 |
-
return self._guided_reflect_mitigation(text)
|
189 |
elif strategy == "Unguided":
|
190 |
return self._unguided_mitigation(text)
|
191 |
else:
|
@@ -348,11 +348,21 @@ Mitigated sentence:"""
|
|
348 |
error_msg = f"β **Blossom LLM Error**\n\nError occurred: {str(e)}"
|
349 |
return error_msg, "An error occurred during LLM processing."
|
350 |
|
351 |
-
def _guided_mitigation(self, text):
|
352 |
"""Guided Mode: Mitigate based on detection result using LLM"""
|
353 |
try:
|
354 |
-
#
|
355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
label = debug_info.get('label', 'normal')
|
357 |
hate_tokens = debug_info.get('hate_tokens', [])
|
358 |
|
@@ -404,10 +414,21 @@ Mitigated sentence:"""
|
|
404 |
error_msg = f"β **Guided Mitigation Error**\n\nError occurred: {str(e)}"
|
405 |
return error_msg, "An error occurred during guided mitigation processing."
|
406 |
|
407 |
-
def _guided_reflect_mitigation(self, text):
|
408 |
"""Guided+Reflect Mode: iterative refinement + critic evaluation"""
|
409 |
try:
|
410 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
411 |
label = debug_info.get('label', 'normal')
|
412 |
hate_tokens = debug_info.get('hate_tokens', [])
|
413 |
# Step 1: Initial mitigation
|
|
|
183 |
if strategy == "Detection Only":
|
184 |
return result_msg, mitigation
|
185 |
elif strategy == "Guided":
|
186 |
+
return self._guided_mitigation(text, debug_info)
|
187 |
elif strategy == "Guided+Reflect":
|
188 |
+
return self._guided_reflect_mitigation(text, debug_info)
|
189 |
elif strategy == "Unguided":
|
190 |
return self._unguided_mitigation(text)
|
191 |
else:
|
|
|
348 |
error_msg = f"β **Blossom LLM Error**\n\nError occurred: {str(e)}"
|
349 |
return error_msg, "An error occurred during LLM processing."
|
350 |
|
351 |
+
def _guided_mitigation(self, text, debug_info=None):
|
352 |
"""Guided Mode: Mitigate based on detection result using LLM"""
|
353 |
try:
|
354 |
+
# Use provided debug_info or perform detection
|
355 |
+
if debug_info is None:
|
356 |
+
detection_result, _, debug_info = self._detection_only(text)
|
357 |
+
else:
|
358 |
+
# Reconstruct detection_result from debug_info
|
359 |
+
label = debug_info.get('label', 'normal')
|
360 |
+
confidence = debug_info.get('confidence', 0.0)
|
361 |
+
hate_tokens = debug_info.get('hate_tokens', [])
|
362 |
+
detection_result = f"π **Detection Result**\n\n**Classification:** {label}\n**Confidence:** {confidence:.2f}\n"
|
363 |
+
if hate_tokens:
|
364 |
+
detection_result += f"**Identified Expressions:** {hate_tokens}"
|
365 |
+
|
366 |
label = debug_info.get('label', 'normal')
|
367 |
hate_tokens = debug_info.get('hate_tokens', [])
|
368 |
|
|
|
414 |
error_msg = f"β **Guided Mitigation Error**\n\nError occurred: {str(e)}"
|
415 |
return error_msg, "An error occurred during guided mitigation processing."
|
416 |
|
417 |
+
def _guided_reflect_mitigation(self, text, debug_info=None):
|
418 |
"""Guided+Reflect Mode: iterative refinement + critic evaluation"""
|
419 |
try:
|
420 |
+
# Use provided debug_info or perform detection
|
421 |
+
if debug_info is None:
|
422 |
+
detection_result, _, debug_info = self._detection_only(text)
|
423 |
+
else:
|
424 |
+
# Reconstruct detection_result from debug_info
|
425 |
+
label = debug_info.get('label', 'normal')
|
426 |
+
confidence = debug_info.get('confidence', 0.0)
|
427 |
+
hate_tokens = debug_info.get('hate_tokens', [])
|
428 |
+
detection_result = f"π **Detection Result**\n\n**Classification:** {label}\n**Confidence:** {confidence:.2f}\n"
|
429 |
+
if hate_tokens:
|
430 |
+
detection_result += f"**Identified Expressions:** {hate_tokens}"
|
431 |
+
|
432 |
label = debug_info.get('label', 'normal')
|
433 |
hate_tokens = debug_info.get('hate_tokens', [])
|
434 |
# Step 1: Initial mitigation
|