SamanthaStorm commited on
Commit
68a049e
Β·
verified Β·
1 Parent(s): 50c26ca

Upload app (9).py

Browse files
Files changed (1) hide show
  1. app (9).py +716 -0
app (9).py ADDED
@@ -0,0 +1,716 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer
5
+ from motif_tagging import detect_motifs
6
+ import re
7
+ import matplotlib.pyplot as plt
8
+ import io
9
+ from PIL import Image
10
+ from datetime import datetime
11
+ from transformers import pipeline as hf_pipeline # prevent name collision with gradio pipeline
12
+
13
+ def get_emotion_profile(text):
14
+ emotions = emotion_pipeline(text)
15
+ if isinstance(emotions, list) and isinstance(emotions[0], list):
16
+ emotions = emotions[0]
17
+ return {e['label'].lower(): round(e['score'], 3) for e in emotions}
18
+ # Emotion model (no retraining needed)
19
+ emotion_pipeline = hf_pipeline(
20
+ "text-classification",
21
+ model="j-hartmann/emotion-english-distilroberta-base",
22
+ top_k=None,
23
+ truncation=True
24
+ )
25
+
26
+ # --- Timeline Visualization Function ---
27
+ def generate_abuse_score_chart(dates, scores, labels):
28
+ import matplotlib.pyplot as plt
29
+ import io
30
+ from PIL import Image
31
+ from datetime import datetime
32
+ import re
33
+
34
+ # Determine if all entries are valid dates
35
+ if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
36
+ parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
37
+ x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
38
+ else:
39
+ parsed_x = list(range(1, len(dates) + 1))
40
+ x_labels = [f"Message {i+1}" for i in range(len(dates))]
41
+
42
+ fig, ax = plt.subplots(figsize=(8, 3))
43
+ ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
44
+
45
+ for x, y in zip(parsed_x, scores):
46
+ ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
47
+
48
+ ax.set_xticks(parsed_x)
49
+ ax.set_xticklabels(x_labels)
50
+ ax.set_xlabel("") # No axis label
51
+ ax.set_ylabel("Abuse Score (%)")
52
+ ax.set_ylim(0, 105)
53
+ ax.grid(True)
54
+ plt.tight_layout()
55
+
56
+ buf = io.BytesIO()
57
+ plt.savefig(buf, format='png')
58
+ buf.seek(0)
59
+ return Image.open(buf)
60
+
61
+
62
+ # --- Abuse Model ---
63
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
64
+
65
+ model_name = "SamanthaStorm/tether-multilabel-v3"
66
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
67
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
68
+
69
+ LABELS = [
70
+ "recovery", "control", "gaslighting", "dismissiveness", "blame shifting",
71
+ "coercion", "aggression", "nonabusive", "deflection", "projection", "insults"
72
+ ]
73
+
74
+ THRESHOLDS = {
75
+ "recovery": 0.999,
76
+ "control": 0.100,
77
+ "gaslighting": 0.410,
78
+ "dismissiveness": 0.867,
79
+ "blame shifting": 0.116,
80
+ "coercion": 0.100,
81
+ "aggression": 0.02,
82
+ "nonabusive": 0.100,
83
+ "deflection": 0.100,
84
+ "projection": 0.100,
85
+ "insults": 0.100
86
+ }
87
+
88
+ PATTERN_WEIGHTS = {
89
+ "gaslighting": 1.5,
90
+ "control": 1.2,
91
+ "dismissiveness": 0.7,
92
+ "blame shifting": 0.5,
93
+ "insults": 1.4,
94
+ "projection": 1.2,
95
+ "recovery": 1.1,
96
+ "coercion": 1.3,
97
+ "aggression": 2.2,
98
+ "nonabusive": 0.1,
99
+ "deflection": 0.4
100
+ }
101
+ RISK_STAGE_LABELS = {
102
+ 1: "πŸŒ€ Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
103
+ 2: "πŸ”₯ Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
104
+ 3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attemptβ€”apologies or emotional repair without accountability.",
105
+ 4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
106
+ }
107
+
108
+ ESCALATION_QUESTIONS = [
109
+ ("Partner has access to firearms or weapons", 4),
110
+ ("Partner threatened to kill you", 3),
111
+ ("Partner threatened you with a weapon", 3),
112
+ ("Partner has ever choked you, even if you considered it consensual at the time", 4),
113
+ ("Partner injured or threatened your pet(s)", 3),
114
+ ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
115
+ ("Partner forced or coerced you into unwanted sexual acts", 3),
116
+ ("Partner threatened to take away your children", 2),
117
+ ("Violence has increased in frequency or severity", 3),
118
+ ("Partner monitors your calls/GPS/social media", 2)
119
+ ]
120
+ DARVO_PATTERNS = [
121
+ "blame shifting", # "You're the reason this happens"
122
+ "projection", # "You're the abusive one"
123
+ "deflection", # "This isn't about that"
124
+ "dismissiveness", # "You're overreacting"
125
+ "insults", # Personal attacks that redirect attention
126
+ "aggression", # Escalates tone to destabilize
127
+ "recovery phase", # Sudden affection following aggression
128
+ "contradictory statements" # β€œI never said that” immediately followed by a version of what they said
129
+ ]
130
+ DARVO_MOTIFS = [
131
+ "I never said that.", "You’re imagining things.", "That never happened.",
132
+ "You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.",
133
+ "I don’t know what you’re talking about.", "You’re overreacting.", "I didn’t mean it that way.",
134
+ "You’re twisting my words.", "You’re remembering it wrong.", "You’re always looking for something to complain about.",
135
+ "You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.",
136
+ "You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.",
137
+ "You’re always so dramatic.", "You’re just trying to make me look bad.",
138
+
139
+ "You’re crazy.", "You’re the one with the problem.", "You’re always so negative.",
140
+ "You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.",
141
+ "You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.",
142
+ "You’re the one causing all the problems.", "You’re just trying to make me feel guilty.",
143
+ "You’re the one who can’t let go of the past.", "You’re the one who’s always angry.",
144
+ "You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.",
145
+ "You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.",
146
+ "You’re the one who’s always making me look like the bad guy.",
147
+ "You’re the one who’s always making me feel like a failure.",
148
+ "You’re the one who’s always making me feel like I’m not good enough.",
149
+
150
+ "I can’t believe you’re doing this to me.", "You’re hurting me.",
151
+ "You’re making me feel like a terrible person.", "You’re always blaming me for everything.",
152
+ "You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.",
153
+ "You’re the one who’s toxic.", "You’re the one who’s gaslighting me.",
154
+ "You’re the one who’s always putting me down.", "You’re the one who’s always making me feel bad.",
155
+ "You’re the one who’s always making me feel like I’m not good enough.",
156
+ "You’re the one who’s always making me feel like I’m the problem.",
157
+ "You’re the one who’s always making me feel like I’m the bad guy.",
158
+ "You’re the one who’s always making me feel like I’m the villain.",
159
+ "You’re the one who’s always making me feel like I’m the one who needs to change.",
160
+ "You’re the one who’s always making me feel like I’m the one who’s wrong.",
161
+ "You’re the one who’s always making me feel like I’m the one who’s crazy.",
162
+ "You’re the one who’s always making me feel like I’m the one who’s abusive.",
163
+ "You’re the one who’s always making me feel like I’m the one who’s toxic."
164
+ ]
165
+ def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
166
+ sadness = emotions.get("sadness", 0)
167
+ joy = emotions.get("joy", 0)
168
+ neutral = emotions.get("neutral", 0)
169
+ disgust = emotions.get("disgust", 0)
170
+ anger = emotions.get("anger", 0)
171
+ fear = emotions.get("fear", 0)
172
+ disgust = emotions.get("disgust", 0)
173
+
174
+ # 1. Performative Regret
175
+ if (
176
+ sadness > 0.4 and
177
+ any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery phase"]) and
178
+ (sentiment == "undermining" or abuse_score > 40)
179
+ ):
180
+ return "performative regret"
181
+
182
+ # 2. Coercive Warmth
183
+ if (
184
+ (joy > 0.3 or sadness > 0.4) and
185
+ any(p in patterns for p in ["control", "gaslighting"]) and
186
+ sentiment == "undermining"
187
+ ):
188
+ return "coercive warmth"
189
+
190
+ # 3. Cold Invalidation
191
+ if (
192
+ (neutral + disgust) > 0.5 and
193
+ any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
194
+ sentiment == "undermining"
195
+ ):
196
+ return "cold invalidation"
197
+
198
+ # 4. Genuine Vulnerability
199
+ if (
200
+ (sadness + fear) > 0.5 and
201
+ sentiment == "supportive" and
202
+ all(p in ["recovery phase"] for p in patterns)
203
+ ):
204
+ return "genuine vulnerability"
205
+
206
+ # 5. Emotional Threat
207
+ if (
208
+ (anger + disgust) > 0.5 and
209
+ any(p in patterns for p in ["control", "threat", "insults", "dismissiveness"]) and
210
+ sentiment == "undermining"
211
+ ):
212
+ return "emotional threat"
213
+
214
+ # 6. Weaponized Sadness
215
+ if (
216
+ sadness > 0.6 and
217
+ any(p in patterns for p in ["guilt tripping", "projection"]) and
218
+ sentiment == "undermining"
219
+ ):
220
+ return "weaponized sadness"
221
+
222
+ # 7. Toxic Resignation
223
+ if (
224
+ neutral > 0.5 and
225
+ any(p in patterns for p in ["dismissiveness", "obscure language"]) and
226
+ sentiment == "undermining"
227
+ ):
228
+ return "toxic resignation"
229
+ # 8. Aggressive Dismissal
230
+ if (
231
+ anger > 0.5 and
232
+ any(p in patterns for p in ["aggression", "insults", "control"]) and
233
+ sentiment == "undermining"
234
+ ):
235
+ return "aggressive dismissal"
236
+ # 9. Deflective Hostility
237
+ if (
238
+ (0.2 < anger < 0.7 or 0.2 < disgust < 0.7) and
239
+ any(p in patterns for p in ["deflection", "projection"]) and
240
+ sentiment == "undermining"
241
+ ):
242
+ return "deflective hostility"
243
+ # 10. Mocking Detachment
244
+ if (
245
+ (neutral + joy) > 0.5 and
246
+ any(p in patterns for p in ["mockery", "insults", "projection"]) and
247
+ sentiment == "undermining"
248
+ ):
249
+ return "mocking detachment"
250
+ # 11. Contradictory Gaslight
251
+ if (
252
+ (joy + anger + sadness) > 0.5 and
253
+ any(p in patterns for p in ["gaslighting", "contradictory statements"]) and
254
+ sentiment == "undermining"
255
+ ):
256
+ return "contradictory gaslight"
257
+ # 12. Calculated Neutrality
258
+ if (
259
+ neutral > 0.6 and
260
+ any(p in patterns for p in ["obscure language", "deflection", "dismissiveness"]) and
261
+ sentiment == "undermining"
262
+ ):
263
+ return "calculated neutrality"
264
+ # 13. Forced Accountability Flip
265
+ if (
266
+ (anger + disgust) > 0.5 and
267
+ any(p in patterns for p in ["blame shifting", "manipulation", "projection"]) and
268
+ sentiment == "undermining"
269
+ ):
270
+ return "forced accountability flip"
271
+ # 14. Conditional Affection
272
+ if (
273
+ joy > 0.4 and
274
+ any(p in patterns for p in ["apology baiting", "control", "recovery phase"]) and
275
+ sentiment == "undermining"
276
+ ):
277
+ return "conditional affection"
278
+
279
+ if (
280
+ (anger + disgust) > 0.5 and
281
+ any(p in patterns for p in ["blame shifting", "projection", "deflection"]) and
282
+ sentiment == "undermining"
283
+ ):
284
+ return "forced accountability flip"
285
+
286
+ # Emotional Instability Fallback
287
+ if (
288
+ (anger + sadness + disgust) > 0.6 and
289
+ sentiment == "undermining"
290
+ ):
291
+ return "emotional instability"
292
+
293
+ return None
294
+ def detect_contradiction(message):
295
+ patterns = [
296
+ (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
297
+ (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
298
+ (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
299
+ (r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
300
+ (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
301
+ (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
302
+ ]
303
+ return any(re.search(p, message, flags) for p, flags in patterns)
304
+
305
+ def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
306
+ # Count all detected DARVO-related patterns
307
+ pattern_hits = sum(1 for p in patterns if p.lower() in DARVO_PATTERNS)
308
+
309
+ # Sentiment delta
310
+ sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
311
+
312
+ # Match against DARVO motifs more loosely
313
+ motif_hits = sum(
314
+ any(phrase.lower() in motif.lower() or motif.lower() in phrase.lower()
315
+ for phrase in DARVO_MOTIFS)
316
+ for motif in motifs_found
317
+ )
318
+ motif_score = motif_hits / max(len(DARVO_MOTIFS), 1)
319
+
320
+ # Contradiction still binary
321
+ contradiction_score = 1.0 if contradiction_flag else 0.0
322
+
323
+ # Final DARVO score
324
+ return round(min(
325
+ 0.3 * pattern_hits +
326
+ 0.3 * sentiment_shift_score +
327
+ 0.25 * motif_score +
328
+ 0.15 * contradiction_score, 1.0
329
+ ), 3)
330
+ def detect_weapon_language(text):
331
+ weapon_keywords = [
332
+ "knife", "knives", "stab", "cut you", "cutting",
333
+ "gun", "shoot", "rifle", "firearm", "pistol",
334
+ "bomb", "blow up", "grenade", "explode",
335
+ "weapon", "armed", "loaded", "kill you", "take you out"
336
+ ]
337
+ text_lower = text.lower()
338
+ return any(word in text_lower for word in weapon_keywords)
339
+ def get_risk_stage(patterns, sentiment):
340
+ if "threat" in patterns or "insults" in patterns:
341
+ return 2
342
+ elif "recovery phase" in patterns:
343
+ return 3
344
+ elif "control" in patterns or "guilt tripping" in patterns:
345
+ return 1
346
+ elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
347
+ return 4
348
+ return 1
349
+
350
+ def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
351
+ import re
352
+
353
+ # Extract aggression score if aggression is detected
354
+ if isinstance(top_label, str) and "aggression" in top_label.lower():
355
+ try:
356
+ match = re.search(r"\(?(\d+)\%?\)?", top_label)
357
+ aggression_score = int(match.group(1)) / 100 if match else 0
358
+ except:
359
+ aggression_score = 0
360
+ else:
361
+ aggression_score = 0
362
+
363
+ # Revised risk logic
364
+ if abuse_score >= 85 or escalation_score >= 16:
365
+ risk_level = "high"
366
+ elif abuse_score >= 60 or escalation_score >= 8 or aggression_score >= 0.25:
367
+ risk_level = "moderate"
368
+ elif stage == 2 and abuse_score >= 40:
369
+ risk_level = "moderate"
370
+ else:
371
+ risk_level = "low"
372
+
373
+ if isinstance(top_label, str) and " – " in top_label:
374
+ pattern_label, pattern_score = top_label.split(" – ")
375
+ else:
376
+ pattern_label = str(top_label) if top_label is not None else "Unknown"
377
+ pattern_score = ""
378
+
379
+ WHY_FLAGGED = {
380
+ "control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
381
+ "gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
382
+ "dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
383
+ "insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
384
+ "threat": "This message includes threatening language, which is a strong predictor of harm.",
385
+ "blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
386
+ "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
387
+ "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
388
+ "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
389
+ "contradictory statements": "This message may contain internal contradictions used to confuse, destabilize, or deflect responsibility.",
390
+ "obscure language": "This message may use overly formal, vague, or complex language to obscure meaning or avoid accountability.",
391
+ "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
392
+ }
393
+
394
+ explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
395
+
396
+ base = f"\n\nπŸ›‘ Risk Level: {risk_level.capitalize()}\n"
397
+ base += f"This message shows strong indicators of **{pattern_label}**. "
398
+
399
+ if risk_level == "high":
400
+ base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
401
+ elif risk_level == "moderate":
402
+ base += "There are signs of emotional pressure or verbal aggression that may escalate if repeated.\n"
403
+ else:
404
+ base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
405
+
406
+ base += f"\nπŸ’‘ *Why this might be flagged:*\n{explanation}\n"
407
+ base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
408
+ base += "🧠 You can review the pattern in context. This tool highlights possible dynamicsβ€”not judgments."
409
+ return base
410
+
411
+ WHY_FLAGGED = {
412
+ "control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
413
+ "gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
414
+ "dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
415
+ "insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
416
+ "threat": "This message includes threatening language, which is a strong predictor of harm.",
417
+ "blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
418
+ "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
419
+ "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
420
+ "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
421
+ "contradictory statements": "This message may contain internal contradictions used to confuse, destabilize, or deflect responsibility.",
422
+ "obscure language": "This message may use overly formal, vague, or complex language to obscure meaning or avoid accountability.",
423
+ "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
424
+ }
425
+ explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
426
+
427
+ base = f"\n\nπŸ›‘ Risk Level: {risk_level.capitalize()}\n"
428
+ base += f"This message shows strong indicators of **{pattern_label}**. "
429
+
430
+ if risk_level == "high":
431
+ base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
432
+ elif risk_level == "moderate":
433
+ base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
434
+ else:
435
+ base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
436
+
437
+ base += f"\nπŸ’‘ *Why this might be flagged:*\n{explanation}\n"
438
+ base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
439
+ base += "🧠 You can review the pattern in context. This tool highlights possible dynamicsβ€”not judgments."
440
+ return base
441
+ def compute_abuse_score(matched_scores, sentiment):
442
+ if not matched_scores:
443
+ return 0
444
+
445
+ # Weighted average of passed patterns
446
+ weighted_total = sum(score * weight for _, score, weight in matched_scores)
447
+ weight_sum = sum(weight for _, _, weight in matched_scores)
448
+ base_score = (weighted_total / weight_sum) * 100
449
+
450
+ # Boost for pattern count
451
+ pattern_count = len(matched_scores)
452
+ scale = 1.0 + 0.25 * max(0, pattern_count - 1) # 1.25x for 2, 1.5x for 3+
453
+ scaled_score = base_score * scale
454
+
455
+ # Pattern floors
456
+ FLOORS = {
457
+ "threat": 70,
458
+ "control": 40,
459
+ "gaslighting": 30,
460
+ "insults": 25,
461
+ "aggression": 40
462
+ }
463
+ floor = max(FLOORS.get(label, 0) for label, _, _ in matched_scores)
464
+ adjusted_score = max(scaled_score, floor)
465
+
466
+ # Sentiment tweak
467
+ if sentiment == "undermining" and adjusted_score < 50:
468
+ adjusted_score += 10
469
+
470
+ return min(adjusted_score, 100)
471
+
472
+
473
+ def analyze_single_message(text, thresholds):
474
+ motif_hits, matched_phrases = detect_motifs(text)
475
+
476
+ # Get emotion profile
477
+ emotion_profile = get_emotion_profile(text)
478
+ sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
479
+
480
+ # Get model scores
481
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
482
+ with torch.no_grad():
483
+ outputs = model(**inputs)
484
+ scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
485
+
486
+ # Sentiment override if neutral is high while critical thresholds are passed
487
+ if emotion_profile.get("neutral", 0) > 0.85 and any(
488
+ scores[LABELS.index(l)] > thresholds[l]
489
+ for l in ["control", "threat", "blame shifting"]
490
+ ):
491
+ sentiment = "undermining"
492
+ else:
493
+ sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
494
+
495
+ weapon_flag = detect_weapon_language(text)
496
+
497
+ adjusted_thresholds = {
498
+ k: v + 0.05 if sentiment == "supportive" else v
499
+ for k, v in thresholds.items()
500
+ }
501
+
502
+ contradiction_flag = detect_contradiction(text)
503
+
504
+ threshold_labels = [
505
+ label for label, score in zip(LABELS, scores)
506
+ if score > adjusted_thresholds[label]
507
+ ]
508
+ tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, 0)
509
+ motifs = [phrase for _, phrase in matched_phrases]
510
+
511
+ darvo_score = calculate_darvo_score(
512
+ threshold_labels,
513
+ sentiment_before=0.0,
514
+ sentiment_after=sentiment_score,
515
+ motifs_found=motifs,
516
+ contradiction_flag=contradiction_flag
517
+ )
518
+
519
+ top_patterns = sorted(
520
+ [(label, score) for label, score in zip(LABELS, scores)],
521
+ key=lambda x: x[1],
522
+ reverse=True
523
+ )[:2]
524
+ # Post-threshold validation: strip recovery if it occurs with undermining sentiment
525
+ if "recovery" in threshold_labels and tone_tag == "forced accountability flip":
526
+ threshold_labels.remove("recovery")
527
+ top_patterns = [p for p in top_patterns if p[0] != "recovery"]
528
+ print("⚠️ Removing 'recovery' due to undermining sentiment (not genuine repair)")
529
+
530
+ matched_scores = [
531
+ (label, score, PATTERN_WEIGHTS.get(label, 1.0))
532
+ for label, score in zip(LABELS, scores)
533
+ if score > adjusted_thresholds[label]
534
+ ]
535
+
536
+ abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
537
+ abuse_score = abuse_score_raw
538
+
539
+ # Risk stage logic
540
+ stage = get_risk_stage(threshold_labels, sentiment) if threshold_labels else 1
541
+ if weapon_flag and stage < 2:
542
+ stage = 2
543
+ if weapon_flag:
544
+ abuse_score_raw = min(abuse_score_raw + 25, 100)
545
+
546
+ abuse_score = min(
547
+ abuse_score_raw,
548
+ 100 if "threat" in threshold_labels or "control" in threshold_labels else 95
549
+ )
550
+
551
+ # Tone tag must happen after abuse_score is finalized
552
+ tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
553
+
554
+ # Debug
555
+ print(f"Emotional Tone Tag: {tone_tag}")
556
+ print("Emotion Profile:")
557
+ for emotion, score in emotion_profile.items():
558
+ print(f" {emotion.capitalize():10}: {score}")
559
+ print("\n--- Debug Info ---")
560
+ print(f"Text: {text}")
561
+ print(f"Sentiment (via emotion): {sentiment} (score: {round(sentiment_score, 3)})")
562
+ print("Abuse Pattern Scores:")
563
+ for label, score in zip(LABELS, scores):
564
+ passed = "βœ…" if score > adjusted_thresholds[label] else "❌"
565
+ print(f" {label:25} β†’ {score:.3f} {passed}")
566
+ print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
567
+ print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
568
+ print(f"Motifs: {motifs}")
569
+ print(f"Contradiction: {contradiction_flag}")
570
+ print("------------------\n")
571
+
572
+ return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
573
+
574
+ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
575
+ none_selected_checked = answers_and_none[-1]
576
+ responses_checked = any(answers_and_none[:-1])
577
+ none_selected = not responses_checked and none_selected_checked
578
+
579
+ if none_selected:
580
+ escalation_score = None
581
+ risk_level = "unknown"
582
+ else:
583
+ escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
584
+
585
+ messages = [msg1, msg2, msg3]
586
+ dates = [date1, date2, date3]
587
+ active = [(m, d) for m, d in zip(messages, dates) if m.strip()]
588
+ if not active:
589
+ return "Please enter at least one message."
590
+
591
+ # Run model on messages
592
+ results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
593
+ abuse_scores = [r[0][0] for r in results]
594
+ top_labels = [r[0][1][0] if r[0][1] else r[0][2][0][0] for r in results]
595
+ top_scores = [r[0][2][0][1] for r in results]
596
+ sentiments = [r[0][3]['label'] for r in results]
597
+ stages = [r[0][4] for r in results]
598
+ darvo_scores = [r[0][5] for r in results]
599
+ tone_tags= [r[0][6] for r in results]
600
+ dates_used = [r[1] or "Undated" for r in results] # Store dates for future mapping
601
+ # Calculate escalation bump *after* model results exist
602
+ escalation_bump = 0
603
+ for result, _ in results:
604
+ abuse_score, threshold_labels, top_patterns, sentiment, stage, darvo_score, tone_tag = result
605
+ if darvo_score > 0.65:
606
+ escalation_bump += 3
607
+ if tone_tag in ["forced accountability flip", "emotional threat"]:
608
+ escalation_bump += 2
609
+ if abuse_score > 80:
610
+ escalation_bump += 2
611
+ if stage == 2:
612
+ escalation_bump += 3
613
+
614
+ # Now we can safely calculate hybrid_score
615
+ hybrid_score = escalation_score + escalation_bump if escalation_score is not None else 0
616
+ risk_level = (
617
+ "High" if hybrid_score >= 16 else
618
+ "Moderate" if hybrid_score >= 8 else
619
+ "Low"
620
+ )
621
+
622
+ # Now compute scores and allow override
623
+ abuse_scores = [r[0][0] for r in results]
624
+ stages = [r[0][4] for r in results]
625
+
626
+ # Post-check override (e.g. stage 2 or high abuse score forces Moderate risk)
627
+ if any(score > 70 for score in abuse_scores) or any(stage == 2 for stage in stages):
628
+ if risk_level == "Low":
629
+ risk_level = "Moderate"
630
+
631
+ for result, date in results:
632
+ assert len(result) == 7, "Unexpected output from analyze_single_message"
633
+
634
+ # --- Composite Abuse Score using compute_abuse_score ---
635
+ composite_abuse_scores = []
636
+
637
+ for result, _ in results:
638
+ _, _, top_patterns, sentiment, _, _, _ = result
639
+ matched_scores = [(label, score, PATTERN_WEIGHTS.get(label, 1.0)) for label, score in top_patterns]
640
+ final_score = compute_abuse_score(matched_scores, sentiment["label"])
641
+ composite_abuse_scores.append(final_score)
642
+
643
+ composite_abuse = int(round(sum(composite_abuse_scores) / len(composite_abuse_scores)))
644
+
645
+
646
+
647
+ most_common_stage = max(set(stages), key=stages.count)
648
+ stage_text = RISK_STAGE_LABELS[most_common_stage]
649
+
650
+ avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
651
+ darvo_blurb = ""
652
+ if avg_darvo > 0.25:
653
+ level = "moderate" if avg_darvo < 0.65 else "high"
654
+ darvo_blurb = f"\n\n🎭 **DARVO Score: {avg_darvo}** β†’ This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
655
+
656
+ out = f"Abuse Intensity: {composite_abuse}%\n"
657
+ out += "πŸ“Š This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
658
+
659
+ # Save this line for later use at the
660
+ if escalation_score is None:
661
+ escalation_text = "πŸ“‰ Escalation Potential: Unknown (Checklist not completed)\n"
662
+ escalation_text += "⚠️ *This section was not completed. Escalation potential is unknown.*\n"
663
+ hybrid_score = 0 # βœ… fallback so it's defined for generate_risk_snippet
664
+ else:
665
+ escalation_text = f"🧨 **Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})**\n"
666
+ escalation_text += "This score comes directly from the safety checklist and functions as a standalone escalation risk score.\n"
667
+ escalation_text += "It indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
668
+ # Derive top_label from the strongest top_patterns across all messages
669
+ top_label = None
670
+ if results:
671
+ sorted_patterns = sorted(
672
+ [(label, score) for r in results for label, score in r[0][2]],
673
+ key=lambda x: x[1],
674
+ reverse=True
675
+ )
676
+ if sorted_patterns:
677
+ top_label = f"{sorted_patterns[0][0]} – {int(round(sorted_patterns[0][1] * 100))}%"
678
+ if top_label is None:
679
+ top_label = "Unknown – 0%"
680
+ out += generate_risk_snippet(composite_abuse, top_label, hybrid_score if escalation_score is not None else 0, most_common_stage)
681
+ out += f"\n\n{stage_text}"
682
+ out += darvo_blurb
683
+ out += "\n\n🎭 **Emotional Tones Detected:**\n"
684
+ for i, tone in enumerate(tone_tags):
685
+ label = tone if tone else "none"
686
+ out += f"β€’ Message {i+1}: *{label}*\n"
687
+ print(f"DEBUG: avg_darvo = {avg_darvo}")
688
+ pattern_labels = [r[0][2][0][0] for r in results] # top label for each message
689
+ timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
690
+ out += "\n\n" + escalation_text
691
+ return out, timeline_image
692
+
693
+ message_date_pairs = [
694
+ (
695
+ gr.Textbox(label=f"Message {i+1}"),
696
+ gr.Textbox(label=f"Date {i+1} (optional)", placeholder="YYYY-MM-DD")
697
+ )
698
+ for i in range(3)
699
+ ]
700
+ textbox_inputs = [item for pair in message_date_pairs for item in pair]
701
+ quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
702
+ none_box = gr.Checkbox(label="None of the above")
703
+
704
+ iface = gr.Interface(
705
+ fn=analyze_composite,
706
+ inputs=textbox_inputs + quiz_boxes + [none_box],
707
+ outputs=[
708
+ gr.Textbox(label="Results"),
709
+ gr.Image(label="Abuse Score Timeline", type="pil")
710
+ ],
711
+ title="Abuse Pattern Detector + Escalation Quiz",
712
+ allow_flagging="manual"
713
+ )
714
+
715
+ if __name__ == "__main__":
716
+ iface.launch()