Spaces:

SamanthaStorm
/

Tether

Running on Zero

App Files Files Community

SamanthaStorm commited on 30 days ago

Commit

0db7070

verified ·

1 Parent(s): d457a0a

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -19

app.py CHANGED Viewed

@@ -67,28 +67,45 @@ model      = AutoModelForSequenceClassification.from_pretrained(model_name)
 tokenizer  = AutoTokenizer.from_pretrained(model_name, use_fast=False)
 LABELS = [
-    "blame shifting", "contradictory statements", "control", "dismissiveness",
-    "gaslighting", "guilt tripping", "insults", "obscure language",
-    "projection", "recovery phase", "threat"
 ]
-THRESHOLDS = {
-    "blame shifting": 0.35, "contradictory statements": 0.27, "control": 0.08, "dismissiveness": 0.32,
-    "gaslighting": 0.27, "guilt tripping": 0.31, "insults": 0.10, "obscure language": 0.55,
-    "projection": 0.09, "recovery phase": 0.33, "threat": 0.15
 }
 PATTERN_WEIGHTS = {
-    "gaslighting": 1.5,
-    "control": 1.2,
-    "dismissiveness": 0.7,
-    "blame shifting": 0.5,
-    "guilt tripping": 1.2,
-    "insults": 1.4,
-    "projection": 1.2,
-    "recovery phase": 1.1,
-    "contradictory statements": 0.75,
-    "threat": 1.6  # 🔧 New: raise weight for threat
 }
 RISK_STAGE_LABELS = {
     1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
@@ -293,9 +310,10 @@ def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
         "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
         "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
         "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
         "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
-    }
     explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
     base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"

 tokenizer  = AutoTokenizer.from_pretrained(model_name, use_fast=False)
 LABELS = [
+    "blame shifting",
+    "contradictory statements",
+    "control",
+    "dismissiveness",
+    "gaslighting",
+    "guilt tripping",
+    "insults",
+    "obscure language",
+    "projection",
+    "recovery phase",
+    "threat"
 ]
+BEST_THRESHOLDS = {
+    "recovery": 0.622,
+    "control": 0.100,
+    "gaslighting": 0.410,
+    "dismissiveness": 0.867,
+    "blameshifting": 0.116,
+    "coercion": 0.100,
+    "aggression": 0.100,
+    "nonabusive": 0.100,
+    "deflection": 0.100,
+    "projection": 0.100,
+    "insults": 0.100
 }
 PATTERN_WEIGHTS = {
+    "gaslighting": 1.4,       # Slightly reduced: threshold tuned to 0.41 (high precision)
+    "control": 1.3,           # Increased: very low threshold, deserves slightly higher weight
+    "dismissiveness": 0.9,    # Increased from 0.7 due to high threshold needed
+    "blame shifting": 0.6,    # Slight increase for stronger detection
+    "guilt tripping": 1.2,    # Leave as is (not in current threshold set, assumed stable)
+    "insults": 1.5,           # Raised slightly: F1 of 1.0 at low threshold, so still high signal
+    "projection": 1.2,        # Leave as is: still valid at current detection levels
+    "recovery phase": 1.0,    # Slightly lowered to reduce false sense of safety
+    "contradictory statements": 0.75,  # Leave as is unless retrained
+    "threat": 1.6             # Keep high due to high severity, even if threshold wasn't tuned here
 }
 RISK_STAGE_LABELS = {
     1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
         "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
         "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
         "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
+        "contradictory statements": "This message may contain internal contradictions used to confuse, destabilize, or deflect responsibility.",
+        "obscure language": "This message may use overly formal, vague, or complex language to obscure meaning or avoid accountability.",
         "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
+}
     explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
     base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"