SamanthaStorm commited on
Commit
cc9d8f5
·
verified ·
1 Parent(s): 2386279

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -404
app.py CHANGED
@@ -15,7 +15,7 @@ def get_emotion_profile(text):
15
  if isinstance(emotions, list) and isinstance(emotions[0], list):
16
  emotions = emotions[0]
17
  return {e['label'].lower(): round(e['score'], 3) for e in emotions}
18
- # Emotion model (no retraining needed)
19
  emotion_pipeline = hf_pipeline(
20
  "text-classification",
21
  model="j-hartmann/emotion-english-distilroberta-base",
@@ -23,15 +23,7 @@ emotion_pipeline = hf_pipeline(
23
  truncation=True
24
  )
25
 
26
- # --- Timeline Visualization Function ---
27
  def generate_abuse_score_chart(dates, scores, labels):
28
- import matplotlib.pyplot as plt
29
- import io
30
- from PIL import Image
31
- from datetime import datetime
32
- import re
33
-
34
- # Determine if all entries are valid dates
35
  if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
36
  parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
37
  x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
@@ -41,13 +33,12 @@ def generate_abuse_score_chart(dates, scores, labels):
41
 
42
  fig, ax = plt.subplots(figsize=(8, 3))
43
  ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
44
-
45
  for x, y in zip(parsed_x, scores):
46
  ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
47
 
48
  ax.set_xticks(parsed_x)
49
  ax.set_xticklabels(x_labels)
50
- ax.set_xlabel("") # No axis label
51
  ax.set_ylabel("Abuse Score (%)")
52
  ax.set_ylim(0, 105)
53
  ax.grid(True)
@@ -58,305 +49,32 @@ def generate_abuse_score_chart(dates, scores, labels):
58
  buf.seek(0)
59
  return Image.open(buf)
60
 
61
-
62
- # --- Abuse Model ---
63
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
64
 
65
  model_name = "SamanthaStorm/tether-multilabel-v3"
66
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
67
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
68
 
69
- LABELS = [
70
- "blame shifting", "contradictory statements", "control", "dismissiveness",
71
- "gaslighting", "guilt tripping", "insults", "obscure language",
72
- "projection", "recovery phase", "threat"
73
- ]
74
-
75
- THRESHOLDS = {
76
- "blame shifting": 0.28, "contradictory statements": 0.27, "control": 0.08, "dismissiveness": 0.32,
77
- "gaslighting": 0.27, "guilt tripping": 0.31, "insults": 0.10, "obscure language": 0.55,
78
- "projection": 0.09, "recovery phase": 0.33, "threat": 0.15
79
- }
80
-
81
- PATTERN_WEIGHTS = {
82
- "gaslighting": 1.5,
83
- "control": 1.2,
84
- "dismissiveness": 0.7,
85
- "blame shifting": 0.8,
86
- "guilt tripping": 1.2,
87
- "insults": 1.4,
88
- "projection": 1.2,
89
- "recovery phase": 1.1,
90
- "contradictory statements": 0.75,
91
- "threat": 1.6 # 🔧 New: raise weight for threat
92
- }
93
- RISK_STAGE_LABELS = {
94
- 1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
95
- 2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
96
- 3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
97
- 4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
98
- }
99
-
100
- ESCALATION_QUESTIONS = [
101
- ("Partner has access to firearms or weapons", 4),
102
- ("Partner threatened to kill you", 3),
103
- ("Partner threatened you with a weapon", 3),
104
- ("Partner has ever choked you, even if you considered it consensual at the time", 4),
105
- ("Partner injured or threatened your pet(s)", 3),
106
- ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
107
- ("Partner forced or coerced you into unwanted sexual acts", 3),
108
- ("Partner threatened to take away your children", 2),
109
- ("Violence has increased in frequency or severity", 3),
110
- ("Partner monitors your calls/GPS/social media", 2)
111
- ]
112
- DARVO_PATTERNS = {
113
- "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
114
- }
115
- DARVO_MOTIFS = [
116
- "I never said that.", "You’re imagining things.", "That never happened.",
117
- "You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.",
118
- "I don’t know what you’re talking about.", "You’re overreacting.", "I didn’t mean it that way.",
119
- "You’re twisting my words.", "You’re remembering it wrong.", "You’re always looking for something to complain about.",
120
- "You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.",
121
- "You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.",
122
- "You’re always so dramatic.", "You’re just trying to make me look bad.",
123
-
124
- "You’re crazy.", "You’re the one with the problem.", "You’re always so negative.",
125
- "You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.",
126
- "You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.",
127
- "You’re the one causing all the problems.", "You’re just trying to make me feel guilty.",
128
- "You’re the one who can’t let go of the past.", "You’re the one who’s always angry.",
129
- "You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.",
130
- "You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.",
131
- "You’re the one who’s always making me look like the bad guy.",
132
- "You’re the one who’s always making me feel like a failure.",
133
- "You’re the one who’s always making me feel like I’m not good enough.",
134
-
135
- "I can’t believe you’re doing this to me.", "You’re hurting me.",
136
- "You’re making me feel like a terrible person.", "You’re always blaming me for everything.",
137
- "You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.",
138
- "You’re the one who’s toxic.", "You’re the one who’s gaslighting me.",
139
- "You’re the one who’s always putting me down.", "You’re the one who’s always making me feel bad.",
140
- "You’re the one who’s always making me feel like I’m not good enough.",
141
- "You’re the one who’s always making me feel like I’m the problem.",
142
- "You’re the one who’s always making me feel like I’m the bad guy.",
143
- "You’re the one who’s always making me feel like I’m the villain.",
144
- "You’re the one who’s always making me feel like I’m the one who needs to change.",
145
- "You’re the one who’s always making me feel like I’m the one who’s wrong.",
146
- "You’re the one who’s always making me feel like I’m the one who’s crazy.",
147
- "You’re the one who’s always making me feel like I’m the one who’s abusive.",
148
- "You’re the one who’s always making me feel like I’m the one who’s toxic."
149
- ]
150
- def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
151
- sadness = emotions.get("sadness", 0)
152
- joy = emotions.get("joy", 0)
153
- neutral = emotions.get("neutral", 0)
154
- disgust = emotions.get("disgust", 0)
155
- anger = emotions.get("anger", 0)
156
- fear = emotions.get("fear", 0)
157
-
158
- # 1. Performative Regret
159
- if (
160
- sadness > 0.4 and
161
- any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery phase"]) and
162
- (sentiment == "undermining" or abuse_score > 40)
163
- ):
164
- return "performative regret"
165
-
166
- # 2. Coercive Warmth
167
- if (
168
- (joy > 0.3 or sadness > 0.4) and
169
- any(p in patterns for p in ["control", "gaslighting"]) and
170
- sentiment == "undermining"
171
- ):
172
- return "coercive warmth"
173
-
174
- # 3. Cold Invalidation
175
- if (
176
- (neutral + disgust) > 0.5 and
177
- any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
178
- sentiment == "undermining"
179
- ):
180
- return "cold invalidation"
181
-
182
- # 4. Genuine Vulnerability
183
- if (
184
- (sadness + fear) > 0.5 and
185
- sentiment == "supportive" and
186
- all(p in ["recovery phase"] for p in patterns)
187
- ):
188
- return "genuine vulnerability"
189
-
190
- # 5. Emotional Threat
191
- if (
192
- (anger + disgust) > 0.5 and
193
- any(p in patterns for p in ["control", "threat", "insults", "dismissiveness"]) and
194
- sentiment == "undermining"
195
- ):
196
- return "emotional threat"
197
-
198
- # 6. Weaponized Sadness
199
- if (
200
- sadness > 0.6 and
201
- any(p in patterns for p in ["guilt tripping", "projection"]) and
202
- sentiment == "undermining"
203
- ):
204
- return "weaponized sadness"
205
-
206
- # 7. Toxic Resignation
207
- if (
208
- neutral > 0.5 and
209
- any(p in patterns for p in ["dismissiveness", "obscure language"]) and
210
- sentiment == "undermining"
211
- ):
212
- return "toxic resignation"
213
-
214
- return None
215
- def detect_contradiction(message):
216
- patterns = [
217
- (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
218
- (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
219
- (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
220
- (r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
221
- (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
222
- (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
223
- ]
224
- return any(re.search(p, message, flags) for p, flags in patterns)
225
-
226
- def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
227
- # Count all detected DARVO-related patterns
228
- pattern_hits = sum(1 for p in patterns if p.lower() in DARVO_PATTERNS)
229
-
230
- # Sentiment delta
231
- sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
232
-
233
- # Match against DARVO motifs more loosely
234
- motif_hits = sum(
235
- any(phrase.lower() in motif.lower() or motif.lower() in phrase.lower()
236
- for phrase in DARVO_MOTIFS)
237
- for motif in motifs_found
238
- )
239
- motif_score = motif_hits / max(len(DARVO_MOTIFS), 1)
240
-
241
- # Contradiction still binary
242
- contradiction_score = 1.0 if contradiction_flag else 0.0
243
-
244
- # Final DARVO score
245
- return round(min(
246
- 0.3 * pattern_hits +
247
- 0.3 * sentiment_shift_score +
248
- 0.25 * motif_score +
249
- 0.15 * contradiction_score, 1.0
250
- ), 3)
251
- def detect_weapon_language(text):
252
- weapon_keywords = [
253
- "knife", "knives", "stab", "cut you", "cutting",
254
- "gun", "shoot", "rifle", "firearm", "pistol",
255
- "bomb", "blow up", "grenade", "explode",
256
- "weapon", "armed", "loaded", "kill you", "take you out"
257
- ]
258
- text_lower = text.lower()
259
- return any(word in text_lower for word in weapon_keywords)
260
- def get_risk_stage(patterns, sentiment):
261
- if "threat" in patterns or "insults" in patterns:
262
- return 2
263
- elif "recovery phase" in patterns:
264
- return 3
265
- elif "control" in patterns or "guilt tripping" in patterns:
266
- return 1
267
- elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
268
- return 4
269
- return 1
270
-
271
- def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
272
- if abuse_score >= 85 or escalation_score >= 16:
273
- risk_level = "high"
274
- elif abuse_score >= 60 or escalation_score >= 8:
275
- risk_level = "moderate"
276
- elif stage == 2 and abuse_score >= 40:
277
- risk_level = "moderate" # 🔧 New rule for escalation stage
278
- else:
279
- risk_level = "low"
280
- if isinstance(top_label, str) and " – " in top_label:
281
- pattern_label, pattern_score = top_label.split(" – ")
282
- else:
283
- pattern_label = str(top_label) if top_label is not None else "Unknown"
284
- pattern_score = ""
285
-
286
- WHY_FLAGGED = {
287
- "control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
288
- "gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
289
- "dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
290
- "insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
291
- "threat": "This message includes threatening language, which is a strong predictor of harm.",
292
- "blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
293
- "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
294
- "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
295
- "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
296
- "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
297
- }
298
-
299
- explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
300
-
301
- base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
302
- base += f"This message shows strong indicators of **{pattern_label}**. "
303
-
304
- if risk_level == "high":
305
- base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
306
- elif risk_level == "moderate":
307
- base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
308
- else:
309
- base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
310
-
311
- base += f"\n💡 *Why this might be flagged:*\n{explanation}\n"
312
- base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
313
- base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
314
- return base
315
- def compute_abuse_score(matched_scores, sentiment):
316
- if not matched_scores:
317
- return 0
318
-
319
- # Weighted average of passed patterns
320
- weighted_total = sum(score * weight for _, score, weight in matched_scores)
321
- weight_sum = sum(weight for _, _, weight in matched_scores)
322
- base_score = (weighted_total / weight_sum) * 100
323
-
324
- # Boost for pattern count
325
- pattern_count = len(matched_scores)
326
- scale = 1.0 + 0.25 * max(0, pattern_count - 1) # 1.25x for 2, 1.5x for 3+
327
- scaled_score = base_score * scale
328
-
329
- # Pattern floors
330
- FLOORS = {
331
- "threat": 70,
332
- "control": 40,
333
- "gaslighting": 30,
334
- "insults": 25
335
- }
336
- floor = max(FLOORS.get(label, 0) for label, _, _ in matched_scores)
337
- adjusted_score = max(scaled_score, floor)
338
 
339
- # Sentiment tweak
340
- if sentiment == "undermining" and adjusted_score < 50:
341
- adjusted_score += 10
342
 
343
- return min(adjusted_score, 100)
344
-
345
-
346
  def analyze_single_message(text, thresholds):
347
  motif_hits, matched_phrases = detect_motifs(text)
348
-
349
- # Get emotion profile
350
  emotion_profile = get_emotion_profile(text)
351
  sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
352
 
353
- # Get model scores first so they can be used in the neutral override
354
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
355
  with torch.no_grad():
356
  outputs = model(**inputs)
357
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
358
 
359
- # Sentiment override if neutral masks abuse
360
  if emotion_profile.get("neutral", 0) > 0.85 and any(
361
  scores[label_idx] > thresholds[LABELS[label_idx]]
362
  for label_idx in [LABELS.index(l) for l in ["control", "threat", "blame shifting"]]
@@ -365,117 +83,17 @@ def analyze_single_message(text, thresholds):
365
  else:
366
  sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
367
 
368
- weapon_flag = detect_weapon_language(text)
369
-
370
  adjusted_thresholds = {
371
  k: v + 0.05 if sentiment == "supportive" else v
372
  for k, v in thresholds.items()
373
  }
374
 
375
- contradiction_flag = detect_contradiction(text)
376
-
377
- threshold_labels = [
378
- label for label, score in zip(LABELS, scores)
379
- if score > adjusted_thresholds[label]
380
- ]
381
-
382
- motifs = [phrase for _, phrase in matched_phrases]
383
-
384
- darvo_score = calculate_darvo_score(
385
- threshold_labels,
386
- sentiment_before=0.0,
387
- sentiment_after=sentiment_score,
388
- motifs_found=motifs,
389
- contradiction_flag=contradiction_flag
390
- )
391
-
392
- top_patterns = sorted(
393
- [(label, score) for label, score in zip(LABELS, scores)],
394
- key=lambda x: x[1],
395
- reverse=True
396
- )[:2]
397
-
398
- ESCALATION_HIERARCHY = [
399
- "threat", "insults", "control", "blame shifting", "gaslighting",
400
- "guilt tripping", "projection", "dismissiveness", "contradictory statements",
401
- "recovery phase", "obscure language"
402
- ]
403
-
404
- # Use top_label from earlier safely, and convert to score if available
405
- label_key = top_label.split(" = ")[0] # Extract raw label (e.g., "Control" from "Control = 78%")
406
- score = label_scores.get(label_key)
407
- label = score_to_label(score) if score is not None else "Unknown"
408
-
409
- # 🛡️ Prevent obscure language from being chosen unless it crosses a hard threshold
410
- MIN_OBSCURE_SCORE = 0.30
411
- if "obscure language" in passed and passed["obscure language"] < MIN_OBSCURE_SCORE:
412
- del passed["obscure language"]
413
-
414
- # 🎯 Calculate matched scores
415
- matched_scores = [
416
- (label, score, PATTERN_WEIGHTS.get(label, 1.0))
417
- for label, score in zip(LABELS, scores)
418
  if score > adjusted_thresholds[label]
419
- ]
420
-
421
- # 🏆 Determine top pattern
422
- if passed:
423
- top_score = max(passed.values())
424
- close_matches = {
425
- label: score for label, score in passed.items()
426
- if (top_score - score) <= 0.05
427
- }
428
- sorted_close = sorted(
429
- close_matches.items(),
430
- key=lambda x: ESCALATION_HIERARCHY.index(x[0])
431
- )
432
- top_pattern_label, top_pattern_score = sorted_close[0]
433
- else:
434
- if not top_patterns:
435
- top_pattern_label, top_pattern_score = "none", 0.0
436
- else:
437
- top_pattern_label, top_pattern_score = top_patterns[0]
438
- top_score = top_pattern_score
439
-
440
- # 🧮 Compute abuse score
441
- abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
442
- abuse_score = abuse_score_raw
443
-
444
- stage = get_risk_stage(threshold_labels, sentiment) if threshold_labels else 1
445
-
446
- if weapon_flag and stage < 2:
447
- stage = 2
448
-
449
- if weapon_flag:
450
- abuse_score_raw = min(abuse_score_raw + 25, 100)
451
-
452
- abuse_score = min(
453
- abuse_score_raw,
454
- 100 if "threat" in threshold_labels or "control" in threshold_labels else 95
455
- )
456
 
457
- # 🎭 Get tone tag
458
- tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
459
- print(f"Emotional Tone Tag: {tone_tag}")
460
-
461
- # 🧾 Debug logs
462
- print("Emotion Profile:")
463
- for emotion, score in emotion_profile.items():
464
- print(f" {emotion.capitalize():10}: {score}")
465
- print("\n--- Debug Info ---")
466
- print(f"Text: {text}")
467
- print(f"Sentiment (via emotion): {sentiment} (score: {round(sentiment_score, 3)})")
468
- print("Abuse Pattern Scores:")
469
- for label, score in zip(LABELS, scores):
470
- passed_mark = "✅" if score > adjusted_thresholds[label] else "❌"
471
- print(f" {label:25} → {score:.3f} {passed_mark}")
472
- print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
473
- print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
474
- print(f"Motifs: {motifs}")
475
- print(f"Contradiction: {contradiction_flag}")
476
- print("------------------\n")
477
-
478
- return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, top_pattern_label
479
 
480
  def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
481
  none_selected_checked = answers_and_none[-1]
@@ -500,15 +118,17 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
500
  return "Please enter at least one message."
501
 
502
  results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
503
- top_labels = [r[0][6] for r in results]
504
  for result, date in results:
505
- assert len(result) == 6, "Unexpected output from analyze_single_message"
506
- abuse_scores = [r[0][0] for r in results]
 
507
  top_scores = [r[0][2][0][1] for r in results]
508
  sentiments = [r[0][3]['label'] for r in results]
509
  stages = [r[0][4] for r in results]
510
  darvo_scores = [r[0][5] for r in results]
511
- dates_used = [r[1] or "Undated" for r in results] # Store dates for future mapping
 
512
 
513
  composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
514
  top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
@@ -525,7 +145,6 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
525
  out = f"Abuse Intensity: {composite_abuse}%\n"
526
  out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
527
 
528
- # Save this line for later use at the
529
  if escalation_score is None:
530
  escalation_text = "📉 Escalation Potential: Unknown (Checklist not completed)\n"
531
  escalation_text += "⚠️ *This section was not completed. Escalation potential is unknown.*\n"
@@ -533,17 +152,20 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
533
  escalation_text = f"🧨 **Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})**\n"
534
  escalation_text += "This score comes directly from the safety checklist and functions as a standalone escalation risk score.\n"
535
  escalation_text += "It indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
 
536
  if top_label is None:
537
  top_label = "Unknown – 0%"
 
538
  out += generate_risk_snippet(composite_abuse, top_label, escalation_score if escalation_score is not None else 0, most_common_stage)
539
  out += f"\n\n{stage_text}"
540
  out += darvo_blurb
 
 
541
  print(f"DEBUG: avg_darvo = {avg_darvo}")
542
- pattern_labels = [r[0][2][0][0] for r in results] # top label for each message
543
  timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
544
- out += "\n\n" + escalation_text
545
  return out, timeline_image
546
-
547
  message_date_pairs = [
548
  (
549
  gr.Textbox(label=f"Message {i+1}"),
 
15
  if isinstance(emotions, list) and isinstance(emotions[0], list):
16
  emotions = emotions[0]
17
  return {e['label'].lower(): round(e['score'], 3) for e in emotions}
18
+
19
  emotion_pipeline = hf_pipeline(
20
  "text-classification",
21
  model="j-hartmann/emotion-english-distilroberta-base",
 
23
  truncation=True
24
  )
25
 
 
26
  def generate_abuse_score_chart(dates, scores, labels):
 
 
 
 
 
 
 
27
  if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
28
  parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
29
  x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
 
33
 
34
  fig, ax = plt.subplots(figsize=(8, 3))
35
  ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
 
36
  for x, y in zip(parsed_x, scores):
37
  ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
38
 
39
  ax.set_xticks(parsed_x)
40
  ax.set_xticklabels(x_labels)
41
+ ax.set_xlabel("")
42
  ax.set_ylabel("Abuse Score (%)")
43
  ax.set_ylim(0, 105)
44
  ax.grid(True)
 
49
  buf.seek(0)
50
  return Image.open(buf)
51
 
 
 
52
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
53
 
54
  model_name = "SamanthaStorm/tether-multilabel-v3"
55
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
56
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
57
 
58
+ LABELS = [...]
59
+ THRESHOLDS = {...}
60
+ PATTERN_WEIGHTS = {...}
61
+ RISK_STAGE_LABELS = {...}
62
+ ESCALATION_QUESTIONS = [...]
63
+ DARVO_PATTERNS = {...}
64
+ DARVO_MOTIFS = [...]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ # (Leave the rest of your helper functions unchanged)
 
 
67
 
 
 
 
68
  def analyze_single_message(text, thresholds):
69
  motif_hits, matched_phrases = detect_motifs(text)
 
 
70
  emotion_profile = get_emotion_profile(text)
71
  sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
72
 
 
73
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
74
  with torch.no_grad():
75
  outputs = model(**inputs)
76
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
77
 
 
78
  if emotion_profile.get("neutral", 0) > 0.85 and any(
79
  scores[label_idx] > thresholds[LABELS[label_idx]]
80
  for label_idx in [LABELS.index(l) for l in ["control", "threat", "blame shifting"]]
 
83
  else:
84
  sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
85
 
 
 
86
  adjusted_thresholds = {
87
  k: v + 0.05 if sentiment == "supportive" else v
88
  for k, v in thresholds.items()
89
  }
90
 
91
+ passed = {
92
+ label: score for label, score in zip(LABELS, scores)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  if score > adjusted_thresholds[label]
94
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # (Continue unchanged)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
99
  none_selected_checked = answers_and_none[-1]
 
118
  return "Please enter at least one message."
119
 
120
  results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
121
+
122
  for result, date in results:
123
+ assert len(result) == 7, "Unexpected output from analyze_single_message"
124
+
125
+ top_labels = [r[0][6] for r in results]
126
  top_scores = [r[0][2][0][1] for r in results]
127
  sentiments = [r[0][3]['label'] for r in results]
128
  stages = [r[0][4] for r in results]
129
  darvo_scores = [r[0][5] for r in results]
130
+ dates_used = [r[1] or "Undated" for r in results]
131
+ abuse_scores = [r[0][0] for r in results]
132
 
133
  composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
134
  top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
 
145
  out = f"Abuse Intensity: {composite_abuse}%\n"
146
  out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
147
 
 
148
  if escalation_score is None:
149
  escalation_text = "📉 Escalation Potential: Unknown (Checklist not completed)\n"
150
  escalation_text += "⚠️ *This section was not completed. Escalation potential is unknown.*\n"
 
152
  escalation_text = f"🧨 **Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})**\n"
153
  escalation_text += "This score comes directly from the safety checklist and functions as a standalone escalation risk score.\n"
154
  escalation_text += "It indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
155
+
156
  if top_label is None:
157
  top_label = "Unknown – 0%"
158
+
159
  out += generate_risk_snippet(composite_abuse, top_label, escalation_score if escalation_score is not None else 0, most_common_stage)
160
  out += f"\n\n{stage_text}"
161
  out += darvo_blurb
162
+ out += "\n\n" + escalation_text
163
+
164
  print(f"DEBUG: avg_darvo = {avg_darvo}")
165
+ pattern_labels = [r[0][2][0][0] for r in results]
166
  timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
 
167
  return out, timeline_image
168
+
169
  message_date_pairs = [
170
  (
171
  gr.Textbox(label=f"Message {i+1}"),