Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ def get_emotion_profile(text):
|
|
15 |
if isinstance(emotions, list) and isinstance(emotions[0], list):
|
16 |
emotions = emotions[0]
|
17 |
return {e['label'].lower(): round(e['score'], 3) for e in emotions}
|
18 |
-
|
19 |
emotion_pipeline = hf_pipeline(
|
20 |
"text-classification",
|
21 |
model="j-hartmann/emotion-english-distilroberta-base",
|
@@ -23,15 +23,7 @@ emotion_pipeline = hf_pipeline(
|
|
23 |
truncation=True
|
24 |
)
|
25 |
|
26 |
-
# --- Timeline Visualization Function ---
|
27 |
def generate_abuse_score_chart(dates, scores, labels):
|
28 |
-
import matplotlib.pyplot as plt
|
29 |
-
import io
|
30 |
-
from PIL import Image
|
31 |
-
from datetime import datetime
|
32 |
-
import re
|
33 |
-
|
34 |
-
# Determine if all entries are valid dates
|
35 |
if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
|
36 |
parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
|
37 |
x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
|
@@ -41,13 +33,12 @@ def generate_abuse_score_chart(dates, scores, labels):
|
|
41 |
|
42 |
fig, ax = plt.subplots(figsize=(8, 3))
|
43 |
ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
|
44 |
-
|
45 |
for x, y in zip(parsed_x, scores):
|
46 |
ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
|
47 |
|
48 |
ax.set_xticks(parsed_x)
|
49 |
ax.set_xticklabels(x_labels)
|
50 |
-
ax.set_xlabel("")
|
51 |
ax.set_ylabel("Abuse Score (%)")
|
52 |
ax.set_ylim(0, 105)
|
53 |
ax.grid(True)
|
@@ -58,305 +49,32 @@ def generate_abuse_score_chart(dates, scores, labels):
|
|
58 |
buf.seek(0)
|
59 |
return Image.open(buf)
|
60 |
|
61 |
-
|
62 |
-
# --- Abuse Model ---
|
63 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
64 |
|
65 |
model_name = "SamanthaStorm/tether-multilabel-v3"
|
66 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
67 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
68 |
|
69 |
-
LABELS = [
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
]
|
74 |
-
|
75 |
-
|
76 |
-
"blame shifting": 0.28, "contradictory statements": 0.27, "control": 0.08, "dismissiveness": 0.32,
|
77 |
-
"gaslighting": 0.27, "guilt tripping": 0.31, "insults": 0.10, "obscure language": 0.55,
|
78 |
-
"projection": 0.09, "recovery phase": 0.33, "threat": 0.15
|
79 |
-
}
|
80 |
-
|
81 |
-
PATTERN_WEIGHTS = {
|
82 |
-
"gaslighting": 1.5,
|
83 |
-
"control": 1.2,
|
84 |
-
"dismissiveness": 0.7,
|
85 |
-
"blame shifting": 0.8,
|
86 |
-
"guilt tripping": 1.2,
|
87 |
-
"insults": 1.4,
|
88 |
-
"projection": 1.2,
|
89 |
-
"recovery phase": 1.1,
|
90 |
-
"contradictory statements": 0.75,
|
91 |
-
"threat": 1.6 # 🔧 New: raise weight for threat
|
92 |
-
}
|
93 |
-
RISK_STAGE_LABELS = {
|
94 |
-
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
|
95 |
-
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
|
96 |
-
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
|
97 |
-
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
|
98 |
-
}
|
99 |
-
|
100 |
-
ESCALATION_QUESTIONS = [
|
101 |
-
("Partner has access to firearms or weapons", 4),
|
102 |
-
("Partner threatened to kill you", 3),
|
103 |
-
("Partner threatened you with a weapon", 3),
|
104 |
-
("Partner has ever choked you, even if you considered it consensual at the time", 4),
|
105 |
-
("Partner injured or threatened your pet(s)", 3),
|
106 |
-
("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
|
107 |
-
("Partner forced or coerced you into unwanted sexual acts", 3),
|
108 |
-
("Partner threatened to take away your children", 2),
|
109 |
-
("Violence has increased in frequency or severity", 3),
|
110 |
-
("Partner monitors your calls/GPS/social media", 2)
|
111 |
-
]
|
112 |
-
DARVO_PATTERNS = {
|
113 |
-
"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
|
114 |
-
}
|
115 |
-
DARVO_MOTIFS = [
|
116 |
-
"I never said that.", "You’re imagining things.", "That never happened.",
|
117 |
-
"You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.",
|
118 |
-
"I don’t know what you’re talking about.", "You’re overreacting.", "I didn’t mean it that way.",
|
119 |
-
"You’re twisting my words.", "You’re remembering it wrong.", "You’re always looking for something to complain about.",
|
120 |
-
"You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.",
|
121 |
-
"You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.",
|
122 |
-
"You’re always so dramatic.", "You’re just trying to make me look bad.",
|
123 |
-
|
124 |
-
"You’re crazy.", "You’re the one with the problem.", "You’re always so negative.",
|
125 |
-
"You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.",
|
126 |
-
"You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.",
|
127 |
-
"You’re the one causing all the problems.", "You’re just trying to make me feel guilty.",
|
128 |
-
"You’re the one who can’t let go of the past.", "You’re the one who’s always angry.",
|
129 |
-
"You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.",
|
130 |
-
"You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.",
|
131 |
-
"You’re the one who’s always making me look like the bad guy.",
|
132 |
-
"You’re the one who’s always making me feel like a failure.",
|
133 |
-
"You’re the one who’s always making me feel like I’m not good enough.",
|
134 |
-
|
135 |
-
"I can’t believe you’re doing this to me.", "You’re hurting me.",
|
136 |
-
"You’re making me feel like a terrible person.", "You’re always blaming me for everything.",
|
137 |
-
"You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.",
|
138 |
-
"You’re the one who’s toxic.", "You’re the one who’s gaslighting me.",
|
139 |
-
"You’re the one who’s always putting me down.", "You’re the one who’s always making me feel bad.",
|
140 |
-
"You’re the one who’s always making me feel like I’m not good enough.",
|
141 |
-
"You’re the one who’s always making me feel like I’m the problem.",
|
142 |
-
"You’re the one who’s always making me feel like I’m the bad guy.",
|
143 |
-
"You’re the one who’s always making me feel like I’m the villain.",
|
144 |
-
"You’re the one who’s always making me feel like I’m the one who needs to change.",
|
145 |
-
"You’re the one who’s always making me feel like I’m the one who’s wrong.",
|
146 |
-
"You’re the one who’s always making me feel like I’m the one who’s crazy.",
|
147 |
-
"You’re the one who’s always making me feel like I’m the one who’s abusive.",
|
148 |
-
"You’re the one who’s always making me feel like I’m the one who’s toxic."
|
149 |
-
]
|
150 |
-
def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
|
151 |
-
sadness = emotions.get("sadness", 0)
|
152 |
-
joy = emotions.get("joy", 0)
|
153 |
-
neutral = emotions.get("neutral", 0)
|
154 |
-
disgust = emotions.get("disgust", 0)
|
155 |
-
anger = emotions.get("anger", 0)
|
156 |
-
fear = emotions.get("fear", 0)
|
157 |
-
|
158 |
-
# 1. Performative Regret
|
159 |
-
if (
|
160 |
-
sadness > 0.4 and
|
161 |
-
any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery phase"]) and
|
162 |
-
(sentiment == "undermining" or abuse_score > 40)
|
163 |
-
):
|
164 |
-
return "performative regret"
|
165 |
-
|
166 |
-
# 2. Coercive Warmth
|
167 |
-
if (
|
168 |
-
(joy > 0.3 or sadness > 0.4) and
|
169 |
-
any(p in patterns for p in ["control", "gaslighting"]) and
|
170 |
-
sentiment == "undermining"
|
171 |
-
):
|
172 |
-
return "coercive warmth"
|
173 |
-
|
174 |
-
# 3. Cold Invalidation
|
175 |
-
if (
|
176 |
-
(neutral + disgust) > 0.5 and
|
177 |
-
any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
|
178 |
-
sentiment == "undermining"
|
179 |
-
):
|
180 |
-
return "cold invalidation"
|
181 |
-
|
182 |
-
# 4. Genuine Vulnerability
|
183 |
-
if (
|
184 |
-
(sadness + fear) > 0.5 and
|
185 |
-
sentiment == "supportive" and
|
186 |
-
all(p in ["recovery phase"] for p in patterns)
|
187 |
-
):
|
188 |
-
return "genuine vulnerability"
|
189 |
-
|
190 |
-
# 5. Emotional Threat
|
191 |
-
if (
|
192 |
-
(anger + disgust) > 0.5 and
|
193 |
-
any(p in patterns for p in ["control", "threat", "insults", "dismissiveness"]) and
|
194 |
-
sentiment == "undermining"
|
195 |
-
):
|
196 |
-
return "emotional threat"
|
197 |
-
|
198 |
-
# 6. Weaponized Sadness
|
199 |
-
if (
|
200 |
-
sadness > 0.6 and
|
201 |
-
any(p in patterns for p in ["guilt tripping", "projection"]) and
|
202 |
-
sentiment == "undermining"
|
203 |
-
):
|
204 |
-
return "weaponized sadness"
|
205 |
-
|
206 |
-
# 7. Toxic Resignation
|
207 |
-
if (
|
208 |
-
neutral > 0.5 and
|
209 |
-
any(p in patterns for p in ["dismissiveness", "obscure language"]) and
|
210 |
-
sentiment == "undermining"
|
211 |
-
):
|
212 |
-
return "toxic resignation"
|
213 |
-
|
214 |
-
return None
|
215 |
-
def detect_contradiction(message):
|
216 |
-
patterns = [
|
217 |
-
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
|
218 |
-
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
|
219 |
-
(r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
|
220 |
-
(r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
|
221 |
-
(r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
|
222 |
-
(r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
|
223 |
-
]
|
224 |
-
return any(re.search(p, message, flags) for p, flags in patterns)
|
225 |
-
|
226 |
-
def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
|
227 |
-
# Count all detected DARVO-related patterns
|
228 |
-
pattern_hits = sum(1 for p in patterns if p.lower() in DARVO_PATTERNS)
|
229 |
-
|
230 |
-
# Sentiment delta
|
231 |
-
sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
|
232 |
-
|
233 |
-
# Match against DARVO motifs more loosely
|
234 |
-
motif_hits = sum(
|
235 |
-
any(phrase.lower() in motif.lower() or motif.lower() in phrase.lower()
|
236 |
-
for phrase in DARVO_MOTIFS)
|
237 |
-
for motif in motifs_found
|
238 |
-
)
|
239 |
-
motif_score = motif_hits / max(len(DARVO_MOTIFS), 1)
|
240 |
-
|
241 |
-
# Contradiction still binary
|
242 |
-
contradiction_score = 1.0 if contradiction_flag else 0.0
|
243 |
-
|
244 |
-
# Final DARVO score
|
245 |
-
return round(min(
|
246 |
-
0.3 * pattern_hits +
|
247 |
-
0.3 * sentiment_shift_score +
|
248 |
-
0.25 * motif_score +
|
249 |
-
0.15 * contradiction_score, 1.0
|
250 |
-
), 3)
|
251 |
-
def detect_weapon_language(text):
|
252 |
-
weapon_keywords = [
|
253 |
-
"knife", "knives", "stab", "cut you", "cutting",
|
254 |
-
"gun", "shoot", "rifle", "firearm", "pistol",
|
255 |
-
"bomb", "blow up", "grenade", "explode",
|
256 |
-
"weapon", "armed", "loaded", "kill you", "take you out"
|
257 |
-
]
|
258 |
-
text_lower = text.lower()
|
259 |
-
return any(word in text_lower for word in weapon_keywords)
|
260 |
-
def get_risk_stage(patterns, sentiment):
|
261 |
-
if "threat" in patterns or "insults" in patterns:
|
262 |
-
return 2
|
263 |
-
elif "recovery phase" in patterns:
|
264 |
-
return 3
|
265 |
-
elif "control" in patterns or "guilt tripping" in patterns:
|
266 |
-
return 1
|
267 |
-
elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
|
268 |
-
return 4
|
269 |
-
return 1
|
270 |
-
|
271 |
-
def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
|
272 |
-
if abuse_score >= 85 or escalation_score >= 16:
|
273 |
-
risk_level = "high"
|
274 |
-
elif abuse_score >= 60 or escalation_score >= 8:
|
275 |
-
risk_level = "moderate"
|
276 |
-
elif stage == 2 and abuse_score >= 40:
|
277 |
-
risk_level = "moderate" # 🔧 New rule for escalation stage
|
278 |
-
else:
|
279 |
-
risk_level = "low"
|
280 |
-
if isinstance(top_label, str) and " – " in top_label:
|
281 |
-
pattern_label, pattern_score = top_label.split(" – ")
|
282 |
-
else:
|
283 |
-
pattern_label = str(top_label) if top_label is not None else "Unknown"
|
284 |
-
pattern_score = ""
|
285 |
-
|
286 |
-
WHY_FLAGGED = {
|
287 |
-
"control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
|
288 |
-
"gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
|
289 |
-
"dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
|
290 |
-
"insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
|
291 |
-
"threat": "This message includes threatening language, which is a strong predictor of harm.",
|
292 |
-
"blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
|
293 |
-
"guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
|
294 |
-
"recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
|
295 |
-
"projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
|
296 |
-
"default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
|
297 |
-
}
|
298 |
-
|
299 |
-
explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
|
300 |
-
|
301 |
-
base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
|
302 |
-
base += f"This message shows strong indicators of **{pattern_label}**. "
|
303 |
-
|
304 |
-
if risk_level == "high":
|
305 |
-
base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
|
306 |
-
elif risk_level == "moderate":
|
307 |
-
base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
|
308 |
-
else:
|
309 |
-
base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
|
310 |
-
|
311 |
-
base += f"\n💡 *Why this might be flagged:*\n{explanation}\n"
|
312 |
-
base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
|
313 |
-
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
|
314 |
-
return base
|
315 |
-
def compute_abuse_score(matched_scores, sentiment):
|
316 |
-
if not matched_scores:
|
317 |
-
return 0
|
318 |
-
|
319 |
-
# Weighted average of passed patterns
|
320 |
-
weighted_total = sum(score * weight for _, score, weight in matched_scores)
|
321 |
-
weight_sum = sum(weight for _, _, weight in matched_scores)
|
322 |
-
base_score = (weighted_total / weight_sum) * 100
|
323 |
-
|
324 |
-
# Boost for pattern count
|
325 |
-
pattern_count = len(matched_scores)
|
326 |
-
scale = 1.0 + 0.25 * max(0, pattern_count - 1) # 1.25x for 2, 1.5x for 3+
|
327 |
-
scaled_score = base_score * scale
|
328 |
-
|
329 |
-
# Pattern floors
|
330 |
-
FLOORS = {
|
331 |
-
"threat": 70,
|
332 |
-
"control": 40,
|
333 |
-
"gaslighting": 30,
|
334 |
-
"insults": 25
|
335 |
-
}
|
336 |
-
floor = max(FLOORS.get(label, 0) for label, _, _ in matched_scores)
|
337 |
-
adjusted_score = max(scaled_score, floor)
|
338 |
|
339 |
-
|
340 |
-
if sentiment == "undermining" and adjusted_score < 50:
|
341 |
-
adjusted_score += 10
|
342 |
|
343 |
-
return min(adjusted_score, 100)
|
344 |
-
|
345 |
-
|
346 |
def analyze_single_message(text, thresholds):
|
347 |
motif_hits, matched_phrases = detect_motifs(text)
|
348 |
-
|
349 |
-
# Get emotion profile
|
350 |
emotion_profile = get_emotion_profile(text)
|
351 |
sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
|
352 |
|
353 |
-
# Get model scores first so they can be used in the neutral override
|
354 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
355 |
with torch.no_grad():
|
356 |
outputs = model(**inputs)
|
357 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
358 |
|
359 |
-
# Sentiment override if neutral masks abuse
|
360 |
if emotion_profile.get("neutral", 0) > 0.85 and any(
|
361 |
scores[label_idx] > thresholds[LABELS[label_idx]]
|
362 |
for label_idx in [LABELS.index(l) for l in ["control", "threat", "blame shifting"]]
|
@@ -365,117 +83,17 @@ def analyze_single_message(text, thresholds):
|
|
365 |
else:
|
366 |
sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
|
367 |
|
368 |
-
weapon_flag = detect_weapon_language(text)
|
369 |
-
|
370 |
adjusted_thresholds = {
|
371 |
k: v + 0.05 if sentiment == "supportive" else v
|
372 |
for k, v in thresholds.items()
|
373 |
}
|
374 |
|
375 |
-
|
376 |
-
|
377 |
-
threshold_labels = [
|
378 |
-
label for label, score in zip(LABELS, scores)
|
379 |
-
if score > adjusted_thresholds[label]
|
380 |
-
]
|
381 |
-
|
382 |
-
motifs = [phrase for _, phrase in matched_phrases]
|
383 |
-
|
384 |
-
darvo_score = calculate_darvo_score(
|
385 |
-
threshold_labels,
|
386 |
-
sentiment_before=0.0,
|
387 |
-
sentiment_after=sentiment_score,
|
388 |
-
motifs_found=motifs,
|
389 |
-
contradiction_flag=contradiction_flag
|
390 |
-
)
|
391 |
-
|
392 |
-
top_patterns = sorted(
|
393 |
-
[(label, score) for label, score in zip(LABELS, scores)],
|
394 |
-
key=lambda x: x[1],
|
395 |
-
reverse=True
|
396 |
-
)[:2]
|
397 |
-
|
398 |
-
ESCALATION_HIERARCHY = [
|
399 |
-
"threat", "insults", "control", "blame shifting", "gaslighting",
|
400 |
-
"guilt tripping", "projection", "dismissiveness", "contradictory statements",
|
401 |
-
"recovery phase", "obscure language"
|
402 |
-
]
|
403 |
-
|
404 |
-
# Use top_label from earlier safely, and convert to score if available
|
405 |
-
label_key = top_label.split(" = ")[0] # Extract raw label (e.g., "Control" from "Control = 78%")
|
406 |
-
score = label_scores.get(label_key)
|
407 |
-
label = score_to_label(score) if score is not None else "Unknown"
|
408 |
-
|
409 |
-
# 🛡️ Prevent obscure language from being chosen unless it crosses a hard threshold
|
410 |
-
MIN_OBSCURE_SCORE = 0.30
|
411 |
-
if "obscure language" in passed and passed["obscure language"] < MIN_OBSCURE_SCORE:
|
412 |
-
del passed["obscure language"]
|
413 |
-
|
414 |
-
# 🎯 Calculate matched scores
|
415 |
-
matched_scores = [
|
416 |
-
(label, score, PATTERN_WEIGHTS.get(label, 1.0))
|
417 |
-
for label, score in zip(LABELS, scores)
|
418 |
if score > adjusted_thresholds[label]
|
419 |
-
|
420 |
-
|
421 |
-
# 🏆 Determine top pattern
|
422 |
-
if passed:
|
423 |
-
top_score = max(passed.values())
|
424 |
-
close_matches = {
|
425 |
-
label: score for label, score in passed.items()
|
426 |
-
if (top_score - score) <= 0.05
|
427 |
-
}
|
428 |
-
sorted_close = sorted(
|
429 |
-
close_matches.items(),
|
430 |
-
key=lambda x: ESCALATION_HIERARCHY.index(x[0])
|
431 |
-
)
|
432 |
-
top_pattern_label, top_pattern_score = sorted_close[0]
|
433 |
-
else:
|
434 |
-
if not top_patterns:
|
435 |
-
top_pattern_label, top_pattern_score = "none", 0.0
|
436 |
-
else:
|
437 |
-
top_pattern_label, top_pattern_score = top_patterns[0]
|
438 |
-
top_score = top_pattern_score
|
439 |
-
|
440 |
-
# 🧮 Compute abuse score
|
441 |
-
abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
|
442 |
-
abuse_score = abuse_score_raw
|
443 |
-
|
444 |
-
stage = get_risk_stage(threshold_labels, sentiment) if threshold_labels else 1
|
445 |
-
|
446 |
-
if weapon_flag and stage < 2:
|
447 |
-
stage = 2
|
448 |
-
|
449 |
-
if weapon_flag:
|
450 |
-
abuse_score_raw = min(abuse_score_raw + 25, 100)
|
451 |
-
|
452 |
-
abuse_score = min(
|
453 |
-
abuse_score_raw,
|
454 |
-
100 if "threat" in threshold_labels or "control" in threshold_labels else 95
|
455 |
-
)
|
456 |
|
457 |
-
#
|
458 |
-
tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
|
459 |
-
print(f"Emotional Tone Tag: {tone_tag}")
|
460 |
-
|
461 |
-
# 🧾 Debug logs
|
462 |
-
print("Emotion Profile:")
|
463 |
-
for emotion, score in emotion_profile.items():
|
464 |
-
print(f" {emotion.capitalize():10}: {score}")
|
465 |
-
print("\n--- Debug Info ---")
|
466 |
-
print(f"Text: {text}")
|
467 |
-
print(f"Sentiment (via emotion): {sentiment} (score: {round(sentiment_score, 3)})")
|
468 |
-
print("Abuse Pattern Scores:")
|
469 |
-
for label, score in zip(LABELS, scores):
|
470 |
-
passed_mark = "✅" if score > adjusted_thresholds[label] else "❌"
|
471 |
-
print(f" {label:25} → {score:.3f} {passed_mark}")
|
472 |
-
print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
|
473 |
-
print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
|
474 |
-
print(f"Motifs: {motifs}")
|
475 |
-
print(f"Contradiction: {contradiction_flag}")
|
476 |
-
print("------------------\n")
|
477 |
-
|
478 |
-
return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, top_pattern_label
|
479 |
|
480 |
def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
481 |
none_selected_checked = answers_and_none[-1]
|
@@ -500,15 +118,17 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
|
500 |
return "Please enter at least one message."
|
501 |
|
502 |
results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
|
503 |
-
|
504 |
for result, date in results:
|
505 |
-
assert len(result) ==
|
506 |
-
|
|
|
507 |
top_scores = [r[0][2][0][1] for r in results]
|
508 |
sentiments = [r[0][3]['label'] for r in results]
|
509 |
stages = [r[0][4] for r in results]
|
510 |
darvo_scores = [r[0][5] for r in results]
|
511 |
-
dates_used = [r[1] or "Undated" for r in results]
|
|
|
512 |
|
513 |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
514 |
top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
|
@@ -525,7 +145,6 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
|
525 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
526 |
out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
|
527 |
|
528 |
-
# Save this line for later use at the
|
529 |
if escalation_score is None:
|
530 |
escalation_text = "📉 Escalation Potential: Unknown (Checklist not completed)\n"
|
531 |
escalation_text += "⚠️ *This section was not completed. Escalation potential is unknown.*\n"
|
@@ -533,17 +152,20 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
|
533 |
escalation_text = f"🧨 **Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})**\n"
|
534 |
escalation_text += "This score comes directly from the safety checklist and functions as a standalone escalation risk score.\n"
|
535 |
escalation_text += "It indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
|
|
|
536 |
if top_label is None:
|
537 |
top_label = "Unknown – 0%"
|
|
|
538 |
out += generate_risk_snippet(composite_abuse, top_label, escalation_score if escalation_score is not None else 0, most_common_stage)
|
539 |
out += f"\n\n{stage_text}"
|
540 |
out += darvo_blurb
|
|
|
|
|
541 |
print(f"DEBUG: avg_darvo = {avg_darvo}")
|
542 |
-
pattern_labels = [r[0][2][0][0] for r in results]
|
543 |
timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
|
544 |
-
out += "\n\n" + escalation_text
|
545 |
return out, timeline_image
|
546 |
-
|
547 |
message_date_pairs = [
|
548 |
(
|
549 |
gr.Textbox(label=f"Message {i+1}"),
|
|
|
15 |
if isinstance(emotions, list) and isinstance(emotions[0], list):
|
16 |
emotions = emotions[0]
|
17 |
return {e['label'].lower(): round(e['score'], 3) for e in emotions}
|
18 |
+
|
19 |
emotion_pipeline = hf_pipeline(
|
20 |
"text-classification",
|
21 |
model="j-hartmann/emotion-english-distilroberta-base",
|
|
|
23 |
truncation=True
|
24 |
)
|
25 |
|
|
|
26 |
def generate_abuse_score_chart(dates, scores, labels):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
|
28 |
parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
|
29 |
x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
|
|
|
33 |
|
34 |
fig, ax = plt.subplots(figsize=(8, 3))
|
35 |
ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
|
|
|
36 |
for x, y in zip(parsed_x, scores):
|
37 |
ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
|
38 |
|
39 |
ax.set_xticks(parsed_x)
|
40 |
ax.set_xticklabels(x_labels)
|
41 |
+
ax.set_xlabel("")
|
42 |
ax.set_ylabel("Abuse Score (%)")
|
43 |
ax.set_ylim(0, 105)
|
44 |
ax.grid(True)
|
|
|
49 |
buf.seek(0)
|
50 |
return Image.open(buf)
|
51 |
|
|
|
|
|
52 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
53 |
|
54 |
model_name = "SamanthaStorm/tether-multilabel-v3"
|
55 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
56 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
57 |
|
58 |
+
LABELS = [...]
|
59 |
+
THRESHOLDS = {...}
|
60 |
+
PATTERN_WEIGHTS = {...}
|
61 |
+
RISK_STAGE_LABELS = {...}
|
62 |
+
ESCALATION_QUESTIONS = [...]
|
63 |
+
DARVO_PATTERNS = {...}
|
64 |
+
DARVO_MOTIFS = [...]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
+
# (Leave the rest of your helper functions unchanged)
|
|
|
|
|
67 |
|
|
|
|
|
|
|
68 |
def analyze_single_message(text, thresholds):
|
69 |
motif_hits, matched_phrases = detect_motifs(text)
|
|
|
|
|
70 |
emotion_profile = get_emotion_profile(text)
|
71 |
sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
|
72 |
|
|
|
73 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
74 |
with torch.no_grad():
|
75 |
outputs = model(**inputs)
|
76 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
77 |
|
|
|
78 |
if emotion_profile.get("neutral", 0) > 0.85 and any(
|
79 |
scores[label_idx] > thresholds[LABELS[label_idx]]
|
80 |
for label_idx in [LABELS.index(l) for l in ["control", "threat", "blame shifting"]]
|
|
|
83 |
else:
|
84 |
sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
|
85 |
|
|
|
|
|
86 |
adjusted_thresholds = {
|
87 |
k: v + 0.05 if sentiment == "supportive" else v
|
88 |
for k, v in thresholds.items()
|
89 |
}
|
90 |
|
91 |
+
passed = {
|
92 |
+
label: score for label, score in zip(LABELS, scores)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
if score > adjusted_thresholds[label]
|
94 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
# (Continue unchanged)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
99 |
none_selected_checked = answers_and_none[-1]
|
|
|
118 |
return "Please enter at least one message."
|
119 |
|
120 |
results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
|
121 |
+
|
122 |
for result, date in results:
|
123 |
+
assert len(result) == 7, "Unexpected output from analyze_single_message"
|
124 |
+
|
125 |
+
top_labels = [r[0][6] for r in results]
|
126 |
top_scores = [r[0][2][0][1] for r in results]
|
127 |
sentiments = [r[0][3]['label'] for r in results]
|
128 |
stages = [r[0][4] for r in results]
|
129 |
darvo_scores = [r[0][5] for r in results]
|
130 |
+
dates_used = [r[1] or "Undated" for r in results]
|
131 |
+
abuse_scores = [r[0][0] for r in results]
|
132 |
|
133 |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
134 |
top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
|
|
|
145 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
146 |
out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
|
147 |
|
|
|
148 |
if escalation_score is None:
|
149 |
escalation_text = "📉 Escalation Potential: Unknown (Checklist not completed)\n"
|
150 |
escalation_text += "⚠️ *This section was not completed. Escalation potential is unknown.*\n"
|
|
|
152 |
escalation_text = f"🧨 **Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})**\n"
|
153 |
escalation_text += "This score comes directly from the safety checklist and functions as a standalone escalation risk score.\n"
|
154 |
escalation_text += "It indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
|
155 |
+
|
156 |
if top_label is None:
|
157 |
top_label = "Unknown – 0%"
|
158 |
+
|
159 |
out += generate_risk_snippet(composite_abuse, top_label, escalation_score if escalation_score is not None else 0, most_common_stage)
|
160 |
out += f"\n\n{stage_text}"
|
161 |
out += darvo_blurb
|
162 |
+
out += "\n\n" + escalation_text
|
163 |
+
|
164 |
print(f"DEBUG: avg_darvo = {avg_darvo}")
|
165 |
+
pattern_labels = [r[0][2][0][0] for r in results]
|
166 |
timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
|
|
|
167 |
return out, timeline_image
|
168 |
+
|
169 |
message_date_pairs = [
|
170 |
(
|
171 |
gr.Textbox(label=f"Message {i+1}"),
|