Spaces:

alohaboy
/

hate-speech-mitigation-demo

Running

App Files Files Community

alohaboy commited on Jul 25

Commit

e5009ef

1 Parent(s): 6e6e1e7

Fix normal bypass: pass debug_info to mitigation methods

Browse files

Files changed (1) hide show

app.py +28 -7

app.py CHANGED Viewed

@@ -183,9 +183,9 @@ class HateSpeechDetectorService:
         if strategy == "Detection Only":
             return result_msg, mitigation
         elif strategy == "Guided":
-            return self._guided_mitigation(text)
         elif strategy == "Guided+Reflect":
-            return self._guided_reflect_mitigation(text)
         elif strategy == "Unguided":
             return self._unguided_mitigation(text)
         else:
@@ -348,11 +348,21 @@ Mitigated sentence:"""
             error_msg = f"❌ **Blossom LLM Error**\n\nError occurred: {str(e)}"
             return error_msg, "An error occurred during LLM processing."
-    def _guided_mitigation(self, text):
         """Guided Mode: Mitigate based on detection result using LLM"""
         try:
-            # First, perform detection with KcELECTRA
-            detection_result, _, debug_info = self._detection_only(text)
             label = debug_info.get('label', 'normal')
             hate_tokens = debug_info.get('hate_tokens', [])
@@ -404,10 +414,21 @@ Mitigated sentence:"""
             error_msg = f"❌ **Guided Mitigation Error**\n\nError occurred: {str(e)}"
             return error_msg, "An error occurred during guided mitigation processing."
-    def _guided_reflect_mitigation(self, text):
         """Guided+Reflect Mode: iterative refinement + critic evaluation"""
         try:
-            detection_result, _, debug_info = self._detection_only(text)
             label = debug_info.get('label', 'normal')
             hate_tokens = debug_info.get('hate_tokens', [])
             # Step 1: Initial mitigation

         if strategy == "Detection Only":
             return result_msg, mitigation
         elif strategy == "Guided":
+            return self._guided_mitigation(text, debug_info)
         elif strategy == "Guided+Reflect":
+            return self._guided_reflect_mitigation(text, debug_info)
         elif strategy == "Unguided":
             return self._unguided_mitigation(text)
         else:
             error_msg = f"❌ **Blossom LLM Error**\n\nError occurred: {str(e)}"
             return error_msg, "An error occurred during LLM processing."
+    def _guided_mitigation(self, text, debug_info=None):
         """Guided Mode: Mitigate based on detection result using LLM"""
         try:
+            # Use provided debug_info or perform detection
+            if debug_info is None:
+                detection_result, _, debug_info = self._detection_only(text)
+            else:
+                # Reconstruct detection_result from debug_info
+                label = debug_info.get('label', 'normal')
+                confidence = debug_info.get('confidence', 0.0)
+                hate_tokens = debug_info.get('hate_tokens', [])
+                detection_result = f"🔍 **Detection Result**\n\n**Classification:** {label}\n**Confidence:** {confidence:.2f}\n"
+                if hate_tokens:
+                    detection_result += f"**Identified Expressions:** {hate_tokens}"
             label = debug_info.get('label', 'normal')
             hate_tokens = debug_info.get('hate_tokens', [])
             error_msg = f"❌ **Guided Mitigation Error**\n\nError occurred: {str(e)}"
             return error_msg, "An error occurred during guided mitigation processing."
+    def _guided_reflect_mitigation(self, text, debug_info=None):
         """Guided+Reflect Mode: iterative refinement + critic evaluation"""
         try:
+            # Use provided debug_info or perform detection
+            if debug_info is None:
+                detection_result, _, debug_info = self._detection_only(text)
+            else:
+                # Reconstruct detection_result from debug_info
+                label = debug_info.get('label', 'normal')
+                confidence = debug_info.get('confidence', 0.0)
+                hate_tokens = debug_info.get('hate_tokens', [])
+                detection_result = f"🔍 **Detection Result**\n\n**Classification:** {label}\n**Confidence:** {confidence:.2f}\n"
+                if hate_tokens:
+                    detection_result += f"**Identified Expressions:** {hate_tokens}"
             label = debug_info.get('label', 'normal')
             hate_tokens = debug_info.get('hate_tokens', [])
             # Step 1: Initial mitigation