SamanthaStorm commited on
Commit
9af038c
Β·
verified Β·
1 Parent(s): a3ecfe2

Upload app (22).py

Browse files
Files changed (1) hide show
  1. app (22).py +795 -0
app (22).py ADDED
@@ -0,0 +1,795 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ import numpy as np
5
+ from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer
6
+ from motif_tagging import detect_motifs
7
+ import re
8
+ import matplotlib.pyplot as plt
9
+ import io
10
+ from PIL import Image
11
+ from datetime import datetime
12
+ from transformers import pipeline as hf_pipeline # prevent name collision with gradio pipeline
13
+
14
+ def get_emotion_profile(text):
15
+ emotions = emotion_pipeline(text)
16
+ if isinstance(emotions, list) and isinstance(emotions[0], list):
17
+ emotions = emotions[0]
18
+ return {e['label'].lower(): round(e['score'], 3) for e in emotions}
19
+ # Emotion model (no retraining needed)
20
+ emotion_pipeline = hf_pipeline(
21
+ "text-classification",
22
+ model="j-hartmann/emotion-english-distilroberta-base",
23
+ top_k=6,
24
+ truncation=True
25
+ )
26
+
27
+ # --- Timeline Visualization Function ---
28
+ def generate_abuse_score_chart(dates, scores, labels):
29
+ import matplotlib.pyplot as plt
30
+ import io
31
+ from PIL import Image
32
+ from datetime import datetime
33
+ import re
34
+
35
+ # Determine if all entries are valid dates
36
+ if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
37
+ parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
38
+ x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
39
+ else:
40
+ parsed_x = list(range(1, len(dates) + 1))
41
+ x_labels = [f"Message {i+1}" for i in range(len(dates))]
42
+
43
+ fig, ax = plt.subplots(figsize=(8, 3))
44
+ ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
45
+
46
+ for x, y in zip(parsed_x, scores):
47
+ ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
48
+
49
+ ax.set_xticks(parsed_x)
50
+ ax.set_xticklabels(x_labels)
51
+ ax.set_xlabel("") # No axis label
52
+ ax.set_ylabel("Abuse Score (%)")
53
+ ax.set_ylim(0, 105)
54
+ ax.grid(True)
55
+ plt.tight_layout()
56
+
57
+ buf = io.BytesIO()
58
+ plt.savefig(buf, format='png')
59
+ buf.seek(0)
60
+ return Image.open(buf)
61
+
62
+
63
+ # --- Abuse Model ---
64
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
65
+
66
+ model_name = "SamanthaStorm/tether-multilabel-v4"
67
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
68
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
69
+
70
+ LABELS = [
71
+ "recovery", "control", "gaslighting", "guilt tripping", "dismissiveness", "blame shifting",
72
+ "nonabusive","projection", "insults", "contradictory statements", "obscure language"
73
+ ]
74
+
75
+ THRESHOLDS = {
76
+ "recovery": 0.27,
77
+ "control": 0.47,
78
+ "gaslighting": 0.48,
79
+ "guilt tripping": .56,
80
+ "dismissiveness": 0.25,
81
+ "blame shifting": 0.55,
82
+ "projection": 0.59,
83
+ "insults": 0.33,
84
+ "contradictory statements": 0.27,
85
+ "obscure language": 0.65,
86
+ "nonabusive": 1.0
87
+ }
88
+
89
+ PATTERN_WEIGHTS = {
90
+ "recovery": 0.7,
91
+ "control": 1.4,
92
+ "gaslighting": 1.50,
93
+ "guilt tripping": 0.9,
94
+ "dismissiveness": 0.9,
95
+ "blame shifting": 0.8,
96
+ "projection": 0.5,
97
+ "insults": 1.2,
98
+ "contradictory statements": 1.0,
99
+ "obscure language": 0.9,
100
+ "nonabusive": 0.0
101
+ }
102
+
103
+ ESCALATION_RISKS = {
104
+ "blame shifting": "low",
105
+ "contradictory statements": "moderate",
106
+ "control": "high",
107
+ "dismissiveness": "moderate",
108
+ "gaslighting": "moderate",
109
+ "guilt tripping": "moderate",
110
+ "insults": "moderate",
111
+ "obscure language": "low",
112
+ "projection": "low",
113
+ "recovery phase": "low"
114
+ }
115
+ RISK_STAGE_LABELS = {
116
+ 1: "πŸŒ€ Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
117
+ 2: "πŸ”₯ Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
118
+ 3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attemptβ€”apologies or emotional repair without accountability.",
119
+ 4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
120
+ }
121
+
122
+ ESCALATION_QUESTIONS = [
123
+ ("Partner has access to firearms or weapons", 4),
124
+ ("Partner threatened to kill you", 3),
125
+ ("Partner threatened you with a weapon", 3),
126
+ ("Partner has ever choked you, even if you considered it consensual at the time", 4),
127
+ ("Partner injured or threatened your pet(s)", 3),
128
+ ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
129
+ ("Partner forced or coerced you into unwanted sexual acts", 3),
130
+ ("Partner threatened to take away your children", 2),
131
+ ("Violence has increased in frequency or severity", 3),
132
+ ("Partner monitors your calls/GPS/social media", 2)
133
+ ]
134
+ def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
135
+ sadness = emotions.get("sadness", 0)
136
+ joy = emotions.get("joy", 0)
137
+ neutral = emotions.get("neutral", 0)
138
+ disgust = emotions.get("disgust", 0)
139
+ anger = emotions.get("anger", 0)
140
+ fear = emotions.get("fear", 0)
141
+ disgust = emotions.get("disgust", 0)
142
+
143
+ # 1. Performative Regret
144
+ if (
145
+ sadness > 0.4 and
146
+ any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery phase"]) and
147
+ (sentiment == "undermining" or abuse_score > 40)
148
+ ):
149
+ return "performative regret"
150
+
151
+ # 2. Coercive Warmth
152
+ if (
153
+ (joy > 0.3 or sadness > 0.4) and
154
+ any(p in patterns for p in ["control", "gaslighting"]) and
155
+ sentiment == "undermining"
156
+ ):
157
+ return "coercive warmth"
158
+
159
+ # 3. Cold Invalidation
160
+ if (
161
+ (neutral + disgust) > 0.5 and
162
+ any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
163
+ sentiment == "undermining"
164
+ ):
165
+ return "cold invalidation"
166
+
167
+ # 4. Genuine Vulnerability
168
+ if (
169
+ (sadness + fear) > 0.5 and
170
+ sentiment == "supportive" and
171
+ all(p in ["recovery phase"] for p in patterns)
172
+ ):
173
+ return "genuine vulnerability"
174
+
175
+ # 5. Emotional Threat
176
+ if (
177
+ (anger + disgust) > 0.5 and
178
+ any(p in patterns for p in ["control", "insults", "dismissiveness"]) and
179
+ sentiment == "undermining"
180
+ ):
181
+ return "emotional threat"
182
+
183
+ # 6. Weaponized Sadness
184
+ if (
185
+ sadness > 0.6 and
186
+ any(p in patterns for p in ["guilt tripping", "projection"]) and
187
+ sentiment == "undermining"
188
+ ):
189
+ return "weaponized sadness"
190
+
191
+ # 7. Toxic Resignation
192
+ if (
193
+ neutral > 0.5 and
194
+ any(p in patterns for p in ["dismissiveness", "obscure language"]) and
195
+ sentiment == "undermining"
196
+ ):
197
+ return "toxic resignation"
198
+ # 8. Aggressive Dismissal
199
+ if (
200
+ anger > 0.5 and
201
+ any(p in patterns for p in ["aggression", "insults", "control"]) and
202
+ sentiment == "undermining"
203
+ ):
204
+ return "aggressive dismissal"
205
+ # 9. Deflective Hostility
206
+ if (
207
+ (0.2 < anger < 0.7 or 0.2 < disgust < 0.7) and
208
+ any(p in patterns for p in ["deflection", "projection"]) and
209
+ sentiment == "undermining"
210
+ ):
211
+ return "deflective hostility"
212
+ # 10. Mocking Detachment
213
+ if (
214
+ (neutral + joy) > 0.5 and
215
+ any(p in patterns for p in ["mockery", "insults", "projection"]) and
216
+ sentiment == "undermining"
217
+ ):
218
+ return "mocking detachment"
219
+ # 11. Contradictory Gaslight
220
+ if (
221
+ (joy + anger + sadness) > 0.5 and
222
+ any(p in patterns for p in ["gaslighting", "contradictory statements"]) and
223
+ sentiment == "undermining"
224
+ ):
225
+ return "contradictory gaslight"
226
+ # 12. Calculated Neutrality
227
+ if (
228
+ neutral > 0.6 and
229
+ any(p in patterns for p in ["obscure language", "deflection", "dismissiveness"]) and
230
+ sentiment == "undermining"
231
+ ):
232
+ return "calculated neutrality"
233
+ # 13. Forced Accountability Flip
234
+ if (
235
+ (anger + disgust) > 0.5 and
236
+ any(p in patterns for p in ["blame shifting", "manipulation", "projection"]) and
237
+ sentiment == "undermining"
238
+ ):
239
+ return "forced accountability flip"
240
+ # 14. Conditional Affection
241
+ if (
242
+ joy > 0.4 and
243
+ any(p in patterns for p in ["apology baiting", "control", "recovery phase"]) and
244
+ sentiment == "undermining"
245
+ ):
246
+ return "conditional affection"
247
+
248
+ if (
249
+ (anger + disgust) > 0.5 and
250
+ any(p in patterns for p in ["blame shifting", "projection", "deflection"]) and
251
+ sentiment == "undermining"
252
+ ):
253
+ return "forced accountability flip"
254
+
255
+ # Emotional Instability Fallback
256
+ if (
257
+ (anger + sadness + disgust) > 0.6 and
258
+ sentiment == "undermining"
259
+ ):
260
+ return "emotional instability"
261
+
262
+ return None
263
+ # πŸ”„ New DARVO score model (regression-based)
264
+ from torch.nn.functional import sigmoid
265
+ import torch
266
+
267
+ # Load your trained DARVO regressor from Hugging Face Hub
268
+ darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1")
269
+ darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
270
+ darvo_model.eval()
271
+
272
+ def predict_darvo_score(text):
273
+ inputs = darvo_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
274
+ with torch.no_grad():
275
+ logits = darvo_model(**inputs).logits
276
+ score = sigmoid(logits).item()
277
+ return round(score, 4) # Rounded for display/output
278
+ def detect_weapon_language(text):
279
+ weapon_keywords = [
280
+ "knife", "knives", "stab", "cut you", "cutting",
281
+ "gun", "shoot", "rifle", "firearm", "pistol",
282
+ "bomb", "blow up", "grenade", "explode",
283
+ "weapon", "armed", "loaded", "kill you", "take you out"
284
+ ]
285
+ text_lower = text.lower()
286
+ return any(word in text_lower for word in weapon_keywords)
287
+ def get_risk_stage(patterns, sentiment):
288
+ if "insults" in patterns:
289
+ return 2
290
+ elif "recovery phase" in patterns:
291
+ return 3
292
+ elif "control" in patterns or "guilt tripping" in patterns:
293
+ return 1
294
+ elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
295
+ return 4
296
+ return 1
297
+
298
+ def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
299
+ import re
300
+
301
+ # Extract aggression score if aggression is detected
302
+ if isinstance(top_label, str) and "aggression" in top_label.lower():
303
+ try:
304
+ match = re.search(r"\(?(\d+)\%?\)?", top_label)
305
+ aggression_score = int(match.group(1)) / 100 if match else 0
306
+ except:
307
+ aggression_score = 0
308
+ else:
309
+ aggression_score = 0
310
+
311
+ # Revised risk logic
312
+ if abuse_score >= 85 or escalation_score >= 16:
313
+ risk_level = "high"
314
+ elif abuse_score >= 60 or escalation_score >= 8 or aggression_score >= 0.25:
315
+ risk_level = "moderate"
316
+ elif stage == 2 and abuse_score >= 40:
317
+ risk_level = "moderate"
318
+ else:
319
+ risk_level = "low"
320
+
321
+ if isinstance(top_label, str) and " – " in top_label:
322
+ pattern_label, pattern_score = top_label.split(" – ")
323
+ else:
324
+ pattern_label = str(top_label) if top_label is not None else "Unknown"
325
+ pattern_score = ""
326
+
327
+ WHY_FLAGGED = {
328
+ "control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
329
+ "gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
330
+ "dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
331
+ "insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
332
+ "blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
333
+ "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
334
+ "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
335
+ "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
336
+ "contradictory statements": "This message may contain internal contradictions used to confuse, destabilize, or deflect responsibility.",
337
+ "obscure language": "This message may use overly formal, vague, or complex language to obscure meaning or avoid accountability.",
338
+ "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
339
+ }
340
+
341
+ explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
342
+
343
+ base = f"\n\nπŸ›‘ Risk Level: {risk_level.capitalize()}\n"
344
+ base += f"This message shows strong indicators of **{pattern_label}**. "
345
+
346
+ if risk_level == "high":
347
+ base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
348
+ elif risk_level == "moderate":
349
+ base += "There are signs of emotional pressure or verbal aggression that may escalate if repeated.\n"
350
+ else:
351
+ base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
352
+
353
+ base += f"\nπŸ’‘ *Why this might be flagged:*\n{explanation}\n"
354
+ base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
355
+ base += "🧠 You can review the pattern in context. This tool highlights possible dynamicsβ€”not judgments."
356
+ return base
357
+
358
+
359
+ # --- Step X: Detect Immediate Danger Threats ---
360
+ THREAT_MOTIFS = [
361
+ "i'll kill you", "i’m going to hurt you", "you’re dead", "you won't survive this",
362
+ "i’ll break your face", "i'll bash your head in", "i’ll snap your neck",
363
+ "i’ll come over there and make you shut up", "i'll knock your teeth out",
364
+ "you’re going to bleed", "you want me to hit you?", "i won’t hold back next time",
365
+ "i swear to god i’ll beat you", "next time, i won’t miss", "i’ll make you scream",
366
+ "i know where you live", "i'm outside", "i’ll be waiting", "i saw you with him",
367
+ "you can’t hide from me", "i’m coming to get you", "i'll find you", "i know your schedule",
368
+ "i watched you leave", "i followed you home", "you'll regret this", "you’ll be sorry",
369
+ "you’re going to wish you hadn’t", "you brought this on yourself", "don’t push me",
370
+ "you have no idea what i’m capable of", "you better watch yourself",
371
+ "i don’t care what happens to you anymore", "i’ll make you suffer", "you’ll pay for this",
372
+ "i’ll never let you go", "you’re nothing without me", "if you leave me, i’ll kill myself",
373
+ "i'll ruin you", "i'll tell everyone what you did", "i’ll make sure everyone knows",
374
+ "i’m going to destroy your name", "you’ll lose everyone", "i’ll expose you",
375
+ "your friends will hate you", "i’ll post everything", "you’ll be cancelled",
376
+ "you’ll lose everything", "i’ll take the house", "i’ll drain your account",
377
+ "you’ll never see a dime", "you’ll be broke when i’m done", "i’ll make sure you lose your job",
378
+ "i’ll take your kids", "i’ll make sure you have nothing", "you can’t afford to leave me",
379
+ "don't make me do this", "you know what happens when i’m mad", "you’re forcing my hand",
380
+ "if you just behaved, this wouldn’t happen", "this is your fault",
381
+ "you’re making me hurt you", "i warned you", "you should have listened"
382
+ ]
383
+
384
+
385
+ @spaces.GPU
386
+ @spaces.GPU
387
+ def compute_abuse_score(matched_scores, sentiment):
388
+ """
389
+ Compute abuse score with more conservative adjustments.
390
+ """
391
+ if not matched_scores:
392
+ return 0.0
393
+
394
+ sorted_scores = sorted(matched_scores, key=lambda x: x[1], reverse=True)
395
+ highest_score = sorted_scores[0][1]
396
+ num_patterns = len(matched_scores)
397
+
398
+ # Scale down base score more aggressively if multiple patterns are present
399
+ if num_patterns > 1:
400
+ highest_score *= (1 - (num_patterns - 1) * 0.2) # Reduce by 20% for each additional pattern
401
+
402
+ base_score = highest_score * 100
403
+
404
+ critical_patterns = {
405
+ 'gaslighting': 1.4, # Reduced
406
+ 'guilt tripping': 1.3, # Reduced
407
+ 'blame shifting': 1.2, # Reduced
408
+ 'control': 1.3, # Reduced
409
+ 'insults': 1.1, # Reduced
410
+ 'manipulation': 1.2,
411
+ 'love bombing': 1.2,
412
+ 'emotional blackmail': 1.4,
413
+ 'dismissiveness': 1.1,
414
+ 'contradictory statements': 1.1
415
+ }
416
+
417
+ for label, score, _ in matched_scores:
418
+ if label in critical_patterns and score > 0.5:
419
+ base_score *= critical_patterns[label]
420
+
421
+ # Further reduce combination multipliers
422
+ if len(matched_scores) >= 2:
423
+ base_score *= 1.1 # Reduced
424
+ if len(matched_scores) >= 3:
425
+ base_score *= 1.05 # Reduced
426
+
427
+ # Reduce high confidence boost
428
+ if any(score > 0.8 for _, score, _ in matched_scores):
429
+ base_score *= 1.05 # Reduced
430
+
431
+ # Sentiment modifier (more nuanced)
432
+ if sentiment == "supportive":
433
+ manipulative_patterns = {'guilt tripping', 'gaslighting', 'blame shifting', 'love bombing'}
434
+ if any(label in manipulative_patterns for label, score, _ in matched_scores if score > 0.6): # Higher threshold
435
+ base_score *= 0.95 # Smaller reduction for strongly manipulative "support"
436
+ elif any(label in manipulative_patterns for label, score, _ in matched_scores if score > 0.4): # Moderate threshold
437
+ base_score *= 0.9 # Moderate reduction for manipulative "support"
438
+ else:
439
+ base_score *= 0.8 # Larger reduction for genuine support
440
+
441
+ elif sentiment == "undermining":
442
+ base_score *= 1.15
443
+
444
+ # Reduce minimum score and threshold for activation
445
+ if any(score > 0.9 for _, score, _ in matched_scores): # Higher threshold
446
+ base_score = max(base_score, 75.0) # Reduced
447
+ elif any(score > 0.7 for _, score, _ in matched_scores): # Moderate threshold
448
+ base_score = max(base_score, 60.0) # Reduced
449
+
450
+ return min(round(base_score, 1), 100.0)
451
+
452
+ def analyze_single_message(text, thresholds):
453
+ print("⚑ ENTERED analyze_single_message")
454
+ stage = 1
455
+ motif_hits, matched_phrases = detect_motifs(text)
456
+
457
+ # Get emotion profile
458
+ emotion_profile = get_emotion_profile(text)
459
+ sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
460
+
461
+ # Get model scores
462
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
463
+ with torch.no_grad():
464
+ outputs = model(**inputs)
465
+ scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
466
+
467
+ # Sentiment override
468
+ if emotion_profile.get("neutral", 0) > 0.85 and any(
469
+ scores[LABELS.index(l)] > thresholds[l]
470
+ for l in ["control", "blame shifting"]
471
+ ):
472
+ sentiment = "undermining"
473
+ else:
474
+ sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
475
+
476
+ weapon_flag = detect_weapon_language(text)
477
+
478
+ adjusted_thresholds = {
479
+ k: v + 0.05 if sentiment == "supportive" else v
480
+ for k, v in thresholds.items()
481
+ }
482
+
483
+ darvo_score = predict_darvo_score(text)
484
+
485
+ threshold_labels = [
486
+ label for label, score in zip(LABELS, scores)
487
+ if score > adjusted_thresholds[label]
488
+ ]
489
+
490
+ # Early exit if nothing passed
491
+ if not threshold_labels:
492
+ return 0.0, [], [], {"label": sentiment}, 1, 0.0, "supportive"
493
+
494
+ top_patterns = sorted(
495
+ [(label, score) for label, score in zip(LABELS, scores)],
496
+ key=lambda x: x[1],
497
+ reverse=True
498
+ )[:2]
499
+
500
+ matched_scores = [
501
+ (label, score, PATTERN_WEIGHTS.get(label, 1.0))
502
+ for label, score in zip(LABELS, scores)
503
+ if score > adjusted_thresholds[label]
504
+ ]
505
+
506
+
507
+ # Cap subtle insults to avoid excessive abuse score
508
+ if (
509
+ len(threshold_labels) == 1 and "insults" in threshold_labels
510
+ and emotion_profile.get("neutral", 0) > 0.85
511
+ ):
512
+ abuse_score_raw = min(abuse_score_raw, 40)
513
+
514
+ # Abuse score
515
+ abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
516
+
517
+ # Weapon adjustment
518
+ if weapon_flag:
519
+ abuse_score_raw = min(abuse_score_raw + 25, 100)
520
+ if stage < 2:
521
+ stage = 2
522
+
523
+ abuse_score = min(abuse_score_raw, 100 if "control" in threshold_labels else 95)
524
+
525
+ # Tone tag
526
+ tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
527
+
528
+ # Remove recovery tag if tone is fake
529
+ if "recovery" in threshold_labels and tone_tag == "forced accountability flip":
530
+ threshold_labels.remove("recovery")
531
+ top_patterns = [p for p in top_patterns if p[0] != "recovery"]
532
+ print("⚠️ Removing 'recovery' due to undermining sentiment (not genuine repair)")
533
+
534
+ # Override profanity/anger for short texts
535
+ profane_words = {"fuck", "fucking", "bitch", "shit", "cunt", "ho", "asshole", "dick", "whore", "slut"}
536
+ tokens = set(text.lower().split())
537
+ has_profane = any(word in tokens for word in profane_words)
538
+ short_text = len(tokens) <= 10
539
+ anger_score = emotion_profile.get("anger", 0)
540
+ if has_profane and anger_score > 0.75 and short_text:
541
+ print("⚠️ Profanity + Anger Override Triggered")
542
+ insult_score = next((s for l, s in top_patterns if l == "insults"), 0)
543
+ if ("insults", insult_score) not in top_patterns:
544
+ top_patterns = [("insults", insult_score)] + top_patterns
545
+ if "insults" not in threshold_labels:
546
+ threshold_labels.append("insults")
547
+
548
+ # Debug
549
+ print(f"Emotional Tone Tag: {tone_tag}")
550
+ print("Emotion Profile:")
551
+ for emotion, score in emotion_profile.items():
552
+ print(f" {emotion.capitalize():10}: {score}")
553
+ print("\n--- Debug Info ---")
554
+ print(f"Text: {text}")
555
+ print(f"Sentiment (via emotion): {sentiment} (score: {round(sentiment_score, 3)})")
556
+ print("Abuse Pattern Scores:")
557
+ for label, score in zip(LABELS, scores):
558
+ passed = "βœ…" if score > adjusted_thresholds[label] else "❌"
559
+ print(f" {label:25} β†’ {score:.3f} {passed}")
560
+ print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
561
+ print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
562
+ print("------------------\n")
563
+
564
+ return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
565
+
566
+ import spaces
567
+
568
+ @spaces.GPU
569
+ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
570
+ from collections import Counter
571
+
572
+ none_selected_checked = answers_and_none[-1]
573
+ responses_checked = any(answers_and_none[:-1])
574
+ none_selected = not responses_checked and none_selected_checked
575
+
576
+ if none_selected:
577
+ escalation_score = 0
578
+ escalation_note = "Checklist completed: no danger items reported."
579
+ escalation_completed = True
580
+ elif responses_checked:
581
+ escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
582
+ escalation_note = "Checklist completed."
583
+ escalation_completed = True
584
+ else:
585
+ escalation_score = None
586
+ escalation_note = "Checklist not completed."
587
+ escalation_completed = False
588
+
589
+ messages = [msg1, msg2, msg3]
590
+ active = [(m, f"Message {i+1}") for i, m in enumerate(messages) if m.strip()]
591
+ if not active:
592
+ return "Please enter at least one message.", None
593
+
594
+ # Flag any threat phrases present in the messages
595
+ import re
596
+
597
+ def normalize(text):
598
+ import unicodedata
599
+ text = text.lower().strip()
600
+ text = unicodedata.normalize("NFKD", text) # handles curly quotes
601
+ text = text.replace("’", "'") # smart to straight
602
+ return re.sub(r"[^a-z0-9 ]", "", text)
603
+
604
+ def detect_threat_motifs(message, motif_list):
605
+ norm_msg = normalize(message)
606
+ return [
607
+ motif for motif in motif_list
608
+ if normalize(motif) in norm_msg
609
+ ]
610
+
611
+ # Collect matches per message
612
+ immediate_threats = [detect_threat_motifs(m, THREAT_MOTIFS) for m, _ in active]
613
+ flat_threats = [t for sublist in immediate_threats for t in sublist]
614
+ threat_risk = "Yes" if flat_threats else "No"
615
+ results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
616
+
617
+ abuse_scores = [r[0][0] for r in results]
618
+ stages = [r[0][4] for r in results]
619
+ darvo_scores = [r[0][5] for r in results]
620
+ tone_tags = [r[0][6] for r in results]
621
+ dates_used = [r[1] for r in results]
622
+
623
+ predicted_labels = [label for r in results for label, _ in r[0][2]]
624
+ high = {'control'}
625
+ moderate = {'gaslighting', 'dismissiveness', 'obscure language', 'insults', 'contradictory statements', 'guilt tripping'}
626
+ low = {'blame shifting', 'projection', 'recovery phase'}
627
+ counts = {'high': 0, 'moderate': 0, 'low': 0}
628
+ for label in predicted_labels:
629
+ if label in high:
630
+ counts['high'] += 1
631
+ elif label in moderate:
632
+ counts['moderate'] += 1
633
+ elif label in low:
634
+ counts['low'] += 1
635
+
636
+ # Pattern escalation logic
637
+ pattern_escalation_risk = "Low"
638
+ if counts['high'] >= 2 and counts['moderate'] >= 2:
639
+ pattern_escalation_risk = "Critical"
640
+ elif (counts['high'] >= 2 and counts['moderate'] >= 1) or (counts['moderate'] >= 3) or (counts['high'] >= 1 and counts['moderate'] >= 2):
641
+ pattern_escalation_risk = "High"
642
+ elif (counts['moderate'] == 2) or (counts['high'] == 1 and counts['moderate'] == 1) or (counts['moderate'] == 1 and counts['low'] >= 2) or (counts['high'] == 1 and sum(counts.values()) == 1):
643
+ pattern_escalation_risk = "Moderate"
644
+
645
+ checklist_escalation_risk = "Unknown" if escalation_score is None else (
646
+ "Critical" if escalation_score >= 20 else
647
+ "Moderate" if escalation_score >= 10 else
648
+ "Low"
649
+ )
650
+
651
+ escalation_bump = 0
652
+ for result, _ in results:
653
+ abuse_score, _, _, sentiment, stage, darvo_score, tone_tag = result
654
+ if darvo_score > 0.65:
655
+ escalation_bump += 3
656
+ if tone_tag in ["forced accountability flip", "emotional threat"]:
657
+ escalation_bump += 2
658
+ if abuse_score > 80:
659
+ escalation_bump += 2
660
+ if stage == 2:
661
+ escalation_bump += 3
662
+
663
+ def rank(label):
664
+ return {"Low": 0, "Moderate": 1, "High": 2, "Critical": 3, "Unknown": 0}.get(label, 0)
665
+
666
+ combined_score = rank(pattern_escalation_risk) + rank(checklist_escalation_risk) + escalation_bump
667
+ escalation_risk = (
668
+ "Critical" if combined_score >= 6 else
669
+ "High" if combined_score >= 4 else
670
+ "Moderate" if combined_score >= 2 else
671
+ "Low"
672
+ )
673
+
674
+ none_selected_checked = answers_and_none[-1]
675
+ responses_checked = any(answers_and_none[:-1])
676
+ none_selected = not responses_checked and none_selected_checked
677
+
678
+ # Determine escalation_score
679
+ if none_selected:
680
+ escalation_score = 0
681
+ escalation_completed = True
682
+ elif responses_checked:
683
+ escalation_score = sum(
684
+ w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a
685
+ )
686
+ escalation_completed = True
687
+ else:
688
+ escalation_score = None
689
+ escalation_completed = False
690
+
691
+ # Build escalation_text and hybrid_score
692
+ if escalation_score is None:
693
+ escalation_text = (
694
+ "🚫 **Escalation Potential: Unknown** (Checklist not completed)\n"
695
+ "⚠️ This section was not completed. Escalation potential is estimated using message data only.\n"
696
+ )
697
+ hybrid_score = 0
698
+ elif escalation_score == 0:
699
+ escalation_text = (
700
+ "βœ… **Escalation Checklist Completed:** No danger items reported.\n"
701
+ "🧭 **Escalation potential estimated from detected message patterns only.**\n"
702
+ f"β€’ Pattern Risk: {pattern_escalation_risk}\n"
703
+ f"β€’ Checklist Risk: None reported\n"
704
+ f"β€’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
705
+ )
706
+ hybrid_score = escalation_bump
707
+ else:
708
+ hybrid_score = escalation_score + escalation_bump
709
+ escalation_text = (
710
+ f"πŸ“ˆ **Escalation Potential: {escalation_risk} ({hybrid_score}/29)**\n"
711
+ "πŸ“‹ This score combines your safety checklist answers *and* detected high-risk behavior.\n"
712
+ f"β€’ Pattern Risk: {pattern_escalation_risk}\n"
713
+ f"β€’ Checklist Risk: {checklist_escalation_risk}\n"
714
+ f"β€’ Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
715
+ )
716
+ # Composite Abuse Score
717
+ composite_abuse_scores = []
718
+ for result, _ in results:
719
+ _, _, top_patterns, sentiment, _, _, _ = result
720
+ matched_scores = [(label, score, PATTERN_WEIGHTS.get(label, 1.0)) for label, score in top_patterns]
721
+ final_score = compute_abuse_score(matched_scores, sentiment["label"])
722
+ composite_abuse_scores.append(final_score)
723
+ composite_abuse = int(round(sum(composite_abuse_scores) / len(composite_abuse_scores)))
724
+
725
+ most_common_stage = max(set(stages), key=stages.count)
726
+ stage_text = RISK_STAGE_LABELS[most_common_stage]
727
+ # Derive top label list for each message
728
+ # safe derive top_labels
729
+ top_labels = []
730
+ for result, _ in results:
731
+ threshold_labels = result[1]
732
+ top_patterns = result[2]
733
+ if threshold_labels:
734
+ top_labels.append(threshold_labels[0])
735
+ elif top_patterns:
736
+ top_labels.append(top_patterns[0][0])
737
+ else:
738
+ top_labels.append("none") # or whatever default you prefer
739
+ avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
740
+ darvo_blurb = ""
741
+ if avg_darvo > 0.25:
742
+ level = "moderate" if avg_darvo < 0.65 else "high"
743
+ darvo_blurb = f"\n\n🎭 **DARVO Score: {avg_darvo}** β†’ This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
744
+
745
+ out = f"Abuse Intensity: {composite_abuse}%\n"
746
+ out += "πŸ“Š This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
747
+ out += generate_risk_snippet(composite_abuse, top_labels[0], hybrid_score, most_common_stage)
748
+ out += f"\n\n{stage_text}"
749
+ out += darvo_blurb
750
+ out += "\n\n🎭 **Emotional Tones Detected:**\n"
751
+ for i, tone in enumerate(tone_tags):
752
+ out += f"β€’ Message {i+1}: *{tone or 'none'}*\n"
753
+ # --- Add Immediate Danger Threats section
754
+ if flat_threats:
755
+ out += "\n\n🚨 **Immediate Danger Threats Detected:**\n"
756
+ for t in set(flat_threats):
757
+ out += f"β€’ \"{t}\"\n"
758
+ out += "\n⚠️ These phrases may indicate an imminent risk to physical safety."
759
+ else:
760
+ out += "\n\n🧩 **Immediate Danger Threats:** None explicitly detected.\n"
761
+ out += "This does *not* rule out risk, but no direct threat phrases were matched."
762
+ pattern_labels = [
763
+ pats[0][0] if (pats := r[0][2]) else "none"
764
+ for r in results
765
+ ]
766
+ timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, top_labels)
767
+ out += "\n\n" + escalation_text
768
+ return out, timeline_image
769
+
770
+ textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
771
+ quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
772
+ none_box = gr.Checkbox(label="None of the above")
773
+
774
+
775
+ # ─── FINAL β€œFORCE LAUNCH” (no guards) ────────────────────────
776
+
777
+ demo = gr.Interface(
778
+ fn=analyze_composite,
779
+ inputs=textbox_inputs + quiz_boxes + [none_box],
780
+ outputs=[
781
+ gr.Textbox(label="Results"),
782
+ gr.Image(label="Abuse Score Timeline", type="pil")
783
+ ],
784
+ title="Abuse Pattern Detector + Escalation Quiz",
785
+ description=(
786
+ "Enter up to three messages that concern you. "
787
+ "For the most accurate results, include messages from a recent emotionally intense period."
788
+ ),
789
+ flagging_mode="manual"
790
+ )
791
+ # This single call will start the server and block,
792
+ # keeping the container alive on Spaces.
793
+ demo.launch()
794
+
795
+