SamanthaStorm commited on
Commit
0db7070
·
verified ·
1 Parent(s): d457a0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -19
app.py CHANGED
@@ -67,28 +67,45 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name)
67
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
68
 
69
  LABELS = [
70
- "blame shifting", "contradictory statements", "control", "dismissiveness",
71
- "gaslighting", "guilt tripping", "insults", "obscure language",
72
- "projection", "recovery phase", "threat"
 
 
 
 
 
 
 
 
 
73
  ]
74
 
75
- THRESHOLDS = {
76
- "blame shifting": 0.35, "contradictory statements": 0.27, "control": 0.08, "dismissiveness": 0.32,
77
- "gaslighting": 0.27, "guilt tripping": 0.31, "insults": 0.10, "obscure language": 0.55,
78
- "projection": 0.09, "recovery phase": 0.33, "threat": 0.15
 
 
 
 
 
 
 
 
79
  }
80
 
81
  PATTERN_WEIGHTS = {
82
- "gaslighting": 1.5,
83
- "control": 1.2,
84
- "dismissiveness": 0.7,
85
- "blame shifting": 0.5,
86
- "guilt tripping": 1.2,
87
- "insults": 1.4,
88
- "projection": 1.2,
89
- "recovery phase": 1.1,
90
- "contradictory statements": 0.75,
91
- "threat": 1.6 # 🔧 New: raise weight for threat
92
  }
93
  RISK_STAGE_LABELS = {
94
  1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
@@ -293,9 +310,10 @@ def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
293
  "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
294
  "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
295
  "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
 
 
296
  "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
297
- }
298
-
299
  explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
300
 
301
  base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
 
67
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
68
 
69
  LABELS = [
70
+ "blame shifting",
71
+ "contradictory statements",
72
+ "control",
73
+ "dismissiveness",
74
+ "gaslighting",
75
+ "guilt tripping",
76
+ "insults",
77
+ "obscure language",
78
+ "projection",
79
+ "recovery phase",
80
+ "threat"
81
+
82
  ]
83
 
84
+ BEST_THRESHOLDS = {
85
+ "recovery": 0.622,
86
+ "control": 0.100,
87
+ "gaslighting": 0.410,
88
+ "dismissiveness": 0.867,
89
+ "blameshifting": 0.116,
90
+ "coercion": 0.100,
91
+ "aggression": 0.100,
92
+ "nonabusive": 0.100,
93
+ "deflection": 0.100,
94
+ "projection": 0.100,
95
+ "insults": 0.100
96
  }
97
 
98
  PATTERN_WEIGHTS = {
99
+ "gaslighting": 1.4, # Slightly reduced: threshold tuned to 0.41 (high precision)
100
+ "control": 1.3, # Increased: very low threshold, deserves slightly higher weight
101
+ "dismissiveness": 0.9, # Increased from 0.7 due to high threshold needed
102
+ "blame shifting": 0.6, # Slight increase for stronger detection
103
+ "guilt tripping": 1.2, # Leave as is (not in current threshold set, assumed stable)
104
+ "insults": 1.5, # Raised slightly: F1 of 1.0 at low threshold, so still high signal
105
+ "projection": 1.2, # Leave as is: still valid at current detection levels
106
+ "recovery phase": 1.0, # Slightly lowered to reduce false sense of safety
107
+ "contradictory statements": 0.75, # Leave as is unless retrained
108
+ "threat": 1.6 # Keep high due to high severity, even if threshold wasn't tuned here
109
  }
110
  RISK_STAGE_LABELS = {
111
  1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
 
310
  "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
311
  "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
312
  "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
313
+ "contradictory statements": "This message may contain internal contradictions used to confuse, destabilize, or deflect responsibility.",
314
+ "obscure language": "This message may use overly formal, vague, or complex language to obscure meaning or avoid accountability.",
315
  "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
316
+ }
 
317
  explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
318
 
319
  base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"