Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -67,28 +67,45 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
|
67 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
68 |
|
69 |
LABELS = [
|
70 |
-
"blame shifting",
|
71 |
-
"
|
72 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
]
|
74 |
|
75 |
-
|
76 |
-
"
|
77 |
-
"
|
78 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
}
|
80 |
|
81 |
PATTERN_WEIGHTS = {
|
82 |
-
"gaslighting": 1.
|
83 |
-
"control": 1.
|
84 |
-
"dismissiveness": 0.7
|
85 |
-
"blame shifting": 0.
|
86 |
-
"guilt tripping": 1.2,
|
87 |
-
"insults": 1.
|
88 |
-
"projection": 1.2,
|
89 |
-
"recovery phase": 1.
|
90 |
-
"contradictory statements": 0.75,
|
91 |
-
"threat": 1.6
|
92 |
}
|
93 |
RISK_STAGE_LABELS = {
|
94 |
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
|
@@ -293,9 +310,10 @@ def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
|
|
293 |
"guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
|
294 |
"recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
|
295 |
"projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
|
|
|
|
|
296 |
"default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
|
297 |
-
|
298 |
-
|
299 |
explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
|
300 |
|
301 |
base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
|
|
|
67 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
68 |
|
69 |
LABELS = [
|
70 |
+
"blame shifting",
|
71 |
+
"contradictory statements",
|
72 |
+
"control",
|
73 |
+
"dismissiveness",
|
74 |
+
"gaslighting",
|
75 |
+
"guilt tripping",
|
76 |
+
"insults",
|
77 |
+
"obscure language",
|
78 |
+
"projection",
|
79 |
+
"recovery phase",
|
80 |
+
"threat"
|
81 |
+
|
82 |
]
|
83 |
|
84 |
+
BEST_THRESHOLDS = {
|
85 |
+
"recovery": 0.622,
|
86 |
+
"control": 0.100,
|
87 |
+
"gaslighting": 0.410,
|
88 |
+
"dismissiveness": 0.867,
|
89 |
+
"blameshifting": 0.116,
|
90 |
+
"coercion": 0.100,
|
91 |
+
"aggression": 0.100,
|
92 |
+
"nonabusive": 0.100,
|
93 |
+
"deflection": 0.100,
|
94 |
+
"projection": 0.100,
|
95 |
+
"insults": 0.100
|
96 |
}
|
97 |
|
98 |
PATTERN_WEIGHTS = {
|
99 |
+
"gaslighting": 1.4, # Slightly reduced: threshold tuned to 0.41 (high precision)
|
100 |
+
"control": 1.3, # Increased: very low threshold, deserves slightly higher weight
|
101 |
+
"dismissiveness": 0.9, # Increased from 0.7 due to high threshold needed
|
102 |
+
"blame shifting": 0.6, # Slight increase for stronger detection
|
103 |
+
"guilt tripping": 1.2, # Leave as is (not in current threshold set, assumed stable)
|
104 |
+
"insults": 1.5, # Raised slightly: F1 of 1.0 at low threshold, so still high signal
|
105 |
+
"projection": 1.2, # Leave as is: still valid at current detection levels
|
106 |
+
"recovery phase": 1.0, # Slightly lowered to reduce false sense of safety
|
107 |
+
"contradictory statements": 0.75, # Leave as is unless retrained
|
108 |
+
"threat": 1.6 # Keep high due to high severity, even if threshold wasn't tuned here
|
109 |
}
|
110 |
RISK_STAGE_LABELS = {
|
111 |
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
|
|
|
310 |
"guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
|
311 |
"recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
|
312 |
"projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
|
313 |
+
"contradictory statements": "This message may contain internal contradictions used to confuse, destabilize, or deflect responsibility.",
|
314 |
+
"obscure language": "This message may use overly formal, vague, or complex language to obscure meaning or avoid accountability.",
|
315 |
"default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
|
316 |
+
}
|
|
|
317 |
explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
|
318 |
|
319 |
base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
|