Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ def get_emotion_profile(text):
|
|
| 15 |
if isinstance(emotions, list) and isinstance(emotions[0], list):
|
| 16 |
emotions = emotions[0]
|
| 17 |
return {e['label'].lower(): round(e['score'], 3) for e in emotions}
|
| 18 |
-
|
| 19 |
emotion_pipeline = hf_pipeline(
|
| 20 |
"text-classification",
|
| 21 |
model="j-hartmann/emotion-english-distilroberta-base",
|
|
@@ -23,7 +23,15 @@ emotion_pipeline = hf_pipeline(
|
|
| 23 |
truncation=True
|
| 24 |
)
|
| 25 |
|
|
|
|
| 26 |
def generate_abuse_score_chart(dates, scores, labels):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
|
| 28 |
parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
|
| 29 |
x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
|
|
@@ -33,12 +41,13 @@ def generate_abuse_score_chart(dates, scores, labels):
|
|
| 33 |
|
| 34 |
fig, ax = plt.subplots(figsize=(8, 3))
|
| 35 |
ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
|
|
|
|
| 36 |
for x, y in zip(parsed_x, scores):
|
| 37 |
ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
|
| 38 |
|
| 39 |
ax.set_xticks(parsed_x)
|
| 40 |
ax.set_xticklabels(x_labels)
|
| 41 |
-
ax.set_xlabel("")
|
| 42 |
ax.set_ylabel("Abuse Score (%)")
|
| 43 |
ax.set_ylim(0, 105)
|
| 44 |
ax.grid(True)
|
|
@@ -49,32 +58,305 @@ def generate_abuse_score_chart(dates, scores, labels):
|
|
| 49 |
buf.seek(0)
|
| 50 |
return Image.open(buf)
|
| 51 |
|
|
|
|
|
|
|
| 52 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 53 |
|
| 54 |
model_name = "SamanthaStorm/tether-multilabel-v3"
|
| 55 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 56 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
| 57 |
|
| 58 |
-
LABELS = [
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
-
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
def analyze_single_message(text, thresholds):
|
| 69 |
motif_hits, matched_phrases = detect_motifs(text)
|
|
|
|
|
|
|
| 70 |
emotion_profile = get_emotion_profile(text)
|
| 71 |
sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
|
| 72 |
|
|
|
|
| 73 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
| 74 |
with torch.no_grad():
|
| 75 |
outputs = model(**inputs)
|
| 76 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
| 77 |
|
|
|
|
| 78 |
if emotion_profile.get("neutral", 0) > 0.85 and any(
|
| 79 |
scores[label_idx] > thresholds[LABELS[label_idx]]
|
| 80 |
for label_idx in [LABELS.index(l) for l in ["control", "threat", "blame shifting"]]
|
|
@@ -83,17 +365,76 @@ def analyze_single_message(text, thresholds):
|
|
| 83 |
else:
|
| 84 |
sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
|
| 85 |
|
|
|
|
|
|
|
| 86 |
adjusted_thresholds = {
|
| 87 |
k: v + 0.05 if sentiment == "supportive" else v
|
| 88 |
for k, v in thresholds.items()
|
| 89 |
}
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
| 93 |
if score > adjusted_thresholds[label]
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
| 99 |
none_selected_checked = answers_and_none[-1]
|
|
@@ -118,17 +459,15 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
|
| 118 |
return "Please enter at least one message."
|
| 119 |
|
| 120 |
results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
|
| 121 |
-
|
| 122 |
for result, date in results:
|
| 123 |
-
assert len(result) ==
|
| 124 |
-
|
| 125 |
-
top_labels = [r[0][
|
| 126 |
top_scores = [r[0][2][0][1] for r in results]
|
| 127 |
sentiments = [r[0][3]['label'] for r in results]
|
| 128 |
stages = [r[0][4] for r in results]
|
| 129 |
darvo_scores = [r[0][5] for r in results]
|
| 130 |
-
dates_used = [r[1] or "Undated" for r in results]
|
| 131 |
-
abuse_scores = [r[0][0] for r in results]
|
| 132 |
|
| 133 |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
| 134 |
top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
|
|
@@ -145,6 +484,7 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
|
| 145 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
| 146 |
out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
|
| 147 |
|
|
|
|
| 148 |
if escalation_score is None:
|
| 149 |
escalation_text = "📉 Escalation Potential: Unknown (Checklist not completed)\n"
|
| 150 |
escalation_text += "⚠️ *This section was not completed. Escalation potential is unknown.*\n"
|
|
@@ -152,20 +492,17 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
|
| 152 |
escalation_text = f"🧨 **Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})**\n"
|
| 153 |
escalation_text += "This score comes directly from the safety checklist and functions as a standalone escalation risk score.\n"
|
| 154 |
escalation_text += "It indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
|
| 155 |
-
|
| 156 |
if top_label is None:
|
| 157 |
top_label = "Unknown – 0%"
|
| 158 |
-
|
| 159 |
out += generate_risk_snippet(composite_abuse, top_label, escalation_score if escalation_score is not None else 0, most_common_stage)
|
| 160 |
out += f"\n\n{stage_text}"
|
| 161 |
out += darvo_blurb
|
| 162 |
-
out += "\n\n" + escalation_text
|
| 163 |
-
|
| 164 |
print(f"DEBUG: avg_darvo = {avg_darvo}")
|
| 165 |
-
pattern_labels = [r[0][2][0][0] for r in results]
|
| 166 |
timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
|
|
|
|
| 167 |
return out, timeline_image
|
| 168 |
-
|
| 169 |
message_date_pairs = [
|
| 170 |
(
|
| 171 |
gr.Textbox(label=f"Message {i+1}"),
|
|
|
|
| 15 |
if isinstance(emotions, list) and isinstance(emotions[0], list):
|
| 16 |
emotions = emotions[0]
|
| 17 |
return {e['label'].lower(): round(e['score'], 3) for e in emotions}
|
| 18 |
+
# Emotion model (no retraining needed)
|
| 19 |
emotion_pipeline = hf_pipeline(
|
| 20 |
"text-classification",
|
| 21 |
model="j-hartmann/emotion-english-distilroberta-base",
|
|
|
|
| 23 |
truncation=True
|
| 24 |
)
|
| 25 |
|
| 26 |
+
# --- Timeline Visualization Function ---
|
| 27 |
def generate_abuse_score_chart(dates, scores, labels):
|
| 28 |
+
import matplotlib.pyplot as plt
|
| 29 |
+
import io
|
| 30 |
+
from PIL import Image
|
| 31 |
+
from datetime import datetime
|
| 32 |
+
import re
|
| 33 |
+
|
| 34 |
+
# Determine if all entries are valid dates
|
| 35 |
if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
|
| 36 |
parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
|
| 37 |
x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
|
|
|
|
| 41 |
|
| 42 |
fig, ax = plt.subplots(figsize=(8, 3))
|
| 43 |
ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
|
| 44 |
+
|
| 45 |
for x, y in zip(parsed_x, scores):
|
| 46 |
ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
|
| 47 |
|
| 48 |
ax.set_xticks(parsed_x)
|
| 49 |
ax.set_xticklabels(x_labels)
|
| 50 |
+
ax.set_xlabel("") # No axis label
|
| 51 |
ax.set_ylabel("Abuse Score (%)")
|
| 52 |
ax.set_ylim(0, 105)
|
| 53 |
ax.grid(True)
|
|
|
|
| 58 |
buf.seek(0)
|
| 59 |
return Image.open(buf)
|
| 60 |
|
| 61 |
+
|
| 62 |
+
# --- Abuse Model ---
|
| 63 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 64 |
|
| 65 |
model_name = "SamanthaStorm/tether-multilabel-v3"
|
| 66 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 67 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
| 68 |
|
| 69 |
+
LABELS = [
|
| 70 |
+
"blame shifting", "contradictory statements", "control", "dismissiveness",
|
| 71 |
+
"gaslighting", "guilt tripping", "insults", "obscure language",
|
| 72 |
+
"projection", "recovery phase", "threat"
|
| 73 |
+
]
|
| 74 |
+
|
| 75 |
+
THRESHOLDS = {
|
| 76 |
+
"blame shifting": 0.30, "contradictory statements": 0.30, "control": 0.08, "dismissiveness": 0.12,
|
| 77 |
+
"gaslighting": 0.09, "guilt tripping": 0.4, "insults": 0.10, "obscure language": 0.55,
|
| 78 |
+
"projection": 0.09, "recovery phase": 0.20, "threat": 0.15
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
PATTERN_WEIGHTS = {
|
| 82 |
+
"gaslighting": 1.5,
|
| 83 |
+
"control": 1.2,
|
| 84 |
+
"dismissiveness": 0.7,
|
| 85 |
+
"blame shifting": 0.8,
|
| 86 |
+
"guilt tripping": 1.2,
|
| 87 |
+
"insults": 1.4,
|
| 88 |
+
"projection": 1.2,
|
| 89 |
+
"recovery phase": 1.1,
|
| 90 |
+
"contradictory statements": 0.75,
|
| 91 |
+
"threat": 1.6 # 🔧 New: raise weight for threat
|
| 92 |
+
}
|
| 93 |
+
RISK_STAGE_LABELS = {
|
| 94 |
+
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
|
| 95 |
+
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
|
| 96 |
+
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
|
| 97 |
+
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
ESCALATION_QUESTIONS = [
|
| 101 |
+
("Partner has access to firearms or weapons", 4),
|
| 102 |
+
("Partner threatened to kill you", 3),
|
| 103 |
+
("Partner threatened you with a weapon", 3),
|
| 104 |
+
("Partner has ever choked you, even if you considered it consensual at the time", 4),
|
| 105 |
+
("Partner injured or threatened your pet(s)", 3),
|
| 106 |
+
("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
|
| 107 |
+
("Partner forced or coerced you into unwanted sexual acts", 3),
|
| 108 |
+
("Partner threatened to take away your children", 2),
|
| 109 |
+
("Violence has increased in frequency or severity", 3),
|
| 110 |
+
("Partner monitors your calls/GPS/social media", 2)
|
| 111 |
+
]
|
| 112 |
+
DARVO_PATTERNS = {
|
| 113 |
+
"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
|
| 114 |
+
}
|
| 115 |
+
DARVO_MOTIFS = [
|
| 116 |
+
"I never said that.", "You’re imagining things.", "That never happened.",
|
| 117 |
+
"You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.",
|
| 118 |
+
"I don’t know what you’re talking about.", "You’re overreacting.", "I didn’t mean it that way.",
|
| 119 |
+
"You’re twisting my words.", "You’re remembering it wrong.", "You’re always looking for something to complain about.",
|
| 120 |
+
"You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.",
|
| 121 |
+
"You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.",
|
| 122 |
+
"You’re always so dramatic.", "You’re just trying to make me look bad.",
|
| 123 |
+
|
| 124 |
+
"You’re crazy.", "You’re the one with the problem.", "You’re always so negative.",
|
| 125 |
+
"You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.",
|
| 126 |
+
"You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.",
|
| 127 |
+
"You’re the one causing all the problems.", "You’re just trying to make me feel guilty.",
|
| 128 |
+
"You’re the one who can’t let go of the past.", "You’re the one who’s always angry.",
|
| 129 |
+
"You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.",
|
| 130 |
+
"You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.",
|
| 131 |
+
"You’re the one who’s always making me look like the bad guy.",
|
| 132 |
+
"You’re the one who’s always making me feel like a failure.",
|
| 133 |
+
"You’re the one who’s always making me feel like I’m not good enough.",
|
| 134 |
+
|
| 135 |
+
"I can’t believe you’re doing this to me.", "You’re hurting me.",
|
| 136 |
+
"You’re making me feel like a terrible person.", "You’re always blaming me for everything.",
|
| 137 |
+
"You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.",
|
| 138 |
+
"You’re the one who’s toxic.", "You’re the one who’s gaslighting me.",
|
| 139 |
+
"You’re the one who’s always putting me down.", "You’re the one who’s always making me feel bad.",
|
| 140 |
+
"You’re the one who’s always making me feel like I’m not good enough.",
|
| 141 |
+
"You’re the one who’s always making me feel like I’m the problem.",
|
| 142 |
+
"You’re the one who’s always making me feel like I’m the bad guy.",
|
| 143 |
+
"You’re the one who’s always making me feel like I’m the villain.",
|
| 144 |
+
"You’re the one who’s always making me feel like I’m the one who needs to change.",
|
| 145 |
+
"You’re the one who’s always making me feel like I’m the one who’s wrong.",
|
| 146 |
+
"You’re the one who’s always making me feel like I’m the one who’s crazy.",
|
| 147 |
+
"You’re the one who’s always making me feel like I’m the one who’s abusive.",
|
| 148 |
+
"You’re the one who’s always making me feel like I’m the one who’s toxic."
|
| 149 |
+
]
|
| 150 |
+
def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
|
| 151 |
+
sadness = emotions.get("sadness", 0)
|
| 152 |
+
joy = emotions.get("joy", 0)
|
| 153 |
+
neutral = emotions.get("neutral", 0)
|
| 154 |
+
disgust = emotions.get("disgust", 0)
|
| 155 |
+
anger = emotions.get("anger", 0)
|
| 156 |
+
fear = emotions.get("fear", 0)
|
| 157 |
+
|
| 158 |
+
# 1. Performative Regret
|
| 159 |
+
if (
|
| 160 |
+
sadness > 0.4 and
|
| 161 |
+
any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery phase"]) and
|
| 162 |
+
(sentiment == "undermining" or abuse_score > 40)
|
| 163 |
+
):
|
| 164 |
+
return "performative regret"
|
| 165 |
+
|
| 166 |
+
# 2. Coercive Warmth
|
| 167 |
+
if (
|
| 168 |
+
(joy > 0.3 or sadness > 0.4) and
|
| 169 |
+
any(p in patterns for p in ["control", "gaslighting"]) and
|
| 170 |
+
sentiment == "undermining"
|
| 171 |
+
):
|
| 172 |
+
return "coercive warmth"
|
| 173 |
+
|
| 174 |
+
# 3. Cold Invalidation
|
| 175 |
+
if (
|
| 176 |
+
(neutral + disgust) > 0.5 and
|
| 177 |
+
any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
|
| 178 |
+
sentiment == "undermining"
|
| 179 |
+
):
|
| 180 |
+
return "cold invalidation"
|
| 181 |
+
|
| 182 |
+
# 4. Genuine Vulnerability
|
| 183 |
+
if (
|
| 184 |
+
(sadness + fear) > 0.5 and
|
| 185 |
+
sentiment == "supportive" and
|
| 186 |
+
all(p in ["recovery phase"] for p in patterns)
|
| 187 |
+
):
|
| 188 |
+
return "genuine vulnerability"
|
| 189 |
+
|
| 190 |
+
# 5. Emotional Threat
|
| 191 |
+
if (
|
| 192 |
+
(anger + disgust) > 0.5 and
|
| 193 |
+
any(p in patterns for p in ["control", "threat", "insults", "dismissiveness"]) and
|
| 194 |
+
sentiment == "undermining"
|
| 195 |
+
):
|
| 196 |
+
return "emotional threat"
|
| 197 |
+
|
| 198 |
+
# 6. Weaponized Sadness
|
| 199 |
+
if (
|
| 200 |
+
sadness > 0.6 and
|
| 201 |
+
any(p in patterns for p in ["guilt tripping", "projection"]) and
|
| 202 |
+
sentiment == "undermining"
|
| 203 |
+
):
|
| 204 |
+
return "weaponized sadness"
|
| 205 |
+
|
| 206 |
+
# 7. Toxic Resignation
|
| 207 |
+
if (
|
| 208 |
+
neutral > 0.5 and
|
| 209 |
+
any(p in patterns for p in ["dismissiveness", "obscure language"]) and
|
| 210 |
+
sentiment == "undermining"
|
| 211 |
+
):
|
| 212 |
+
return "toxic resignation"
|
| 213 |
+
|
| 214 |
+
return None
|
| 215 |
+
def detect_contradiction(message):
|
| 216 |
+
patterns = [
|
| 217 |
+
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
|
| 218 |
+
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
|
| 219 |
+
(r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
|
| 220 |
+
(r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
|
| 221 |
+
(r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
|
| 222 |
+
(r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
|
| 223 |
+
]
|
| 224 |
+
return any(re.search(p, message, flags) for p, flags in patterns)
|
| 225 |
+
|
| 226 |
+
def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
|
| 227 |
+
# Count all detected DARVO-related patterns
|
| 228 |
+
pattern_hits = sum(1 for p in patterns if p.lower() in DARVO_PATTERNS)
|
| 229 |
+
|
| 230 |
+
# Sentiment delta
|
| 231 |
+
sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
|
| 232 |
+
|
| 233 |
+
# Match against DARVO motifs more loosely
|
| 234 |
+
motif_hits = sum(
|
| 235 |
+
any(phrase.lower() in motif.lower() or motif.lower() in phrase.lower()
|
| 236 |
+
for phrase in DARVO_MOTIFS)
|
| 237 |
+
for motif in motifs_found
|
| 238 |
+
)
|
| 239 |
+
motif_score = motif_hits / max(len(DARVO_MOTIFS), 1)
|
| 240 |
+
|
| 241 |
+
# Contradiction still binary
|
| 242 |
+
contradiction_score = 1.0 if contradiction_flag else 0.0
|
| 243 |
+
|
| 244 |
+
# Final DARVO score
|
| 245 |
+
return round(min(
|
| 246 |
+
0.3 * pattern_hits +
|
| 247 |
+
0.3 * sentiment_shift_score +
|
| 248 |
+
0.25 * motif_score +
|
| 249 |
+
0.15 * contradiction_score, 1.0
|
| 250 |
+
), 3)
|
| 251 |
+
def detect_weapon_language(text):
|
| 252 |
+
weapon_keywords = [
|
| 253 |
+
"knife", "knives", "stab", "cut you", "cutting",
|
| 254 |
+
"gun", "shoot", "rifle", "firearm", "pistol",
|
| 255 |
+
"bomb", "blow up", "grenade", "explode",
|
| 256 |
+
"weapon", "armed", "loaded", "kill you", "take you out"
|
| 257 |
+
]
|
| 258 |
+
text_lower = text.lower()
|
| 259 |
+
return any(word in text_lower for word in weapon_keywords)
|
| 260 |
+
def get_risk_stage(patterns, sentiment):
|
| 261 |
+
if "threat" in patterns or "insults" in patterns:
|
| 262 |
+
return 2
|
| 263 |
+
elif "recovery phase" in patterns:
|
| 264 |
+
return 3
|
| 265 |
+
elif "control" in patterns or "guilt tripping" in patterns:
|
| 266 |
+
return 1
|
| 267 |
+
elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
|
| 268 |
+
return 4
|
| 269 |
+
return 1
|
| 270 |
+
|
| 271 |
+
def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
|
| 272 |
+
if abuse_score >= 85 or escalation_score >= 16:
|
| 273 |
+
risk_level = "high"
|
| 274 |
+
elif abuse_score >= 60 or escalation_score >= 8:
|
| 275 |
+
risk_level = "moderate"
|
| 276 |
+
elif stage == 2 and abuse_score >= 40:
|
| 277 |
+
risk_level = "moderate" # 🔧 New rule for escalation stage
|
| 278 |
+
else:
|
| 279 |
+
risk_level = "low"
|
| 280 |
+
if isinstance(top_label, str) and " – " in top_label:
|
| 281 |
+
pattern_label, pattern_score = top_label.split(" – ")
|
| 282 |
+
else:
|
| 283 |
+
pattern_label = str(top_label) if top_label is not None else "Unknown"
|
| 284 |
+
pattern_score = ""
|
| 285 |
+
|
| 286 |
+
WHY_FLAGGED = {
|
| 287 |
+
"control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
|
| 288 |
+
"gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
|
| 289 |
+
"dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
|
| 290 |
+
"insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
|
| 291 |
+
"threat": "This message includes threatening language, which is a strong predictor of harm.",
|
| 292 |
+
"blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
|
| 293 |
+
"guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
|
| 294 |
+
"recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
|
| 295 |
+
"projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
|
| 296 |
+
"default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
|
| 297 |
+
}
|
| 298 |
|
| 299 |
+
explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
|
| 300 |
|
| 301 |
+
base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
|
| 302 |
+
base += f"This message shows strong indicators of **{pattern_label}**. "
|
| 303 |
+
|
| 304 |
+
if risk_level == "high":
|
| 305 |
+
base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
|
| 306 |
+
elif risk_level == "moderate":
|
| 307 |
+
base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
|
| 308 |
+
else:
|
| 309 |
+
base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
|
| 310 |
+
|
| 311 |
+
base += f"\n💡 *Why this might be flagged:*\n{explanation}\n"
|
| 312 |
+
base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
|
| 313 |
+
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
|
| 314 |
+
return base
|
| 315 |
+
def compute_abuse_score(matched_scores, sentiment):
|
| 316 |
+
if not matched_scores:
|
| 317 |
+
return 0
|
| 318 |
+
|
| 319 |
+
# Weighted average of passed patterns
|
| 320 |
+
weighted_total = sum(score * weight for _, score, weight in matched_scores)
|
| 321 |
+
weight_sum = sum(weight for _, _, weight in matched_scores)
|
| 322 |
+
base_score = (weighted_total / weight_sum) * 100
|
| 323 |
+
|
| 324 |
+
# Boost for pattern count
|
| 325 |
+
pattern_count = len(matched_scores)
|
| 326 |
+
scale = 1.0 + 0.25 * max(0, pattern_count - 1) # 1.25x for 2, 1.5x for 3+
|
| 327 |
+
scaled_score = base_score * scale
|
| 328 |
+
|
| 329 |
+
# Pattern floors
|
| 330 |
+
FLOORS = {
|
| 331 |
+
"threat": 70,
|
| 332 |
+
"control": 40,
|
| 333 |
+
"gaslighting": 30,
|
| 334 |
+
"insults": 25
|
| 335 |
+
}
|
| 336 |
+
floor = max(FLOORS.get(label, 0) for label, _, _ in matched_scores)
|
| 337 |
+
adjusted_score = max(scaled_score, floor)
|
| 338 |
+
|
| 339 |
+
# Sentiment tweak
|
| 340 |
+
if sentiment == "undermining" and adjusted_score < 50:
|
| 341 |
+
adjusted_score += 10
|
| 342 |
+
|
| 343 |
+
return min(adjusted_score, 100)
|
| 344 |
+
|
| 345 |
+
|
| 346 |
def analyze_single_message(text, thresholds):
|
| 347 |
motif_hits, matched_phrases = detect_motifs(text)
|
| 348 |
+
|
| 349 |
+
# Get emotion profile
|
| 350 |
emotion_profile = get_emotion_profile(text)
|
| 351 |
sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
|
| 352 |
|
| 353 |
+
# Get model scores first so they can be used in the neutral override
|
| 354 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
| 355 |
with torch.no_grad():
|
| 356 |
outputs = model(**inputs)
|
| 357 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
| 358 |
|
| 359 |
+
# Sentiment override if neutral masks abuse
|
| 360 |
if emotion_profile.get("neutral", 0) > 0.85 and any(
|
| 361 |
scores[label_idx] > thresholds[LABELS[label_idx]]
|
| 362 |
for label_idx in [LABELS.index(l) for l in ["control", "threat", "blame shifting"]]
|
|
|
|
| 365 |
else:
|
| 366 |
sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
|
| 367 |
|
| 368 |
+
weapon_flag = detect_weapon_language(text)
|
| 369 |
+
|
| 370 |
adjusted_thresholds = {
|
| 371 |
k: v + 0.05 if sentiment == "supportive" else v
|
| 372 |
for k, v in thresholds.items()
|
| 373 |
}
|
| 374 |
|
| 375 |
+
contradiction_flag = detect_contradiction(text)
|
| 376 |
+
|
| 377 |
+
threshold_labels = [
|
| 378 |
+
label for label, score in zip(LABELS, scores)
|
| 379 |
if score > adjusted_thresholds[label]
|
| 380 |
+
]
|
| 381 |
+
|
| 382 |
+
motifs = [phrase for _, phrase in matched_phrases]
|
| 383 |
+
|
| 384 |
+
darvo_score = calculate_darvo_score(
|
| 385 |
+
threshold_labels,
|
| 386 |
+
sentiment_before=0.0,
|
| 387 |
+
sentiment_after=sentiment_score,
|
| 388 |
+
motifs_found=motifs,
|
| 389 |
+
contradiction_flag=contradiction_flag
|
| 390 |
+
)
|
| 391 |
|
| 392 |
+
top_patterns = sorted(
|
| 393 |
+
[(label, score) for label, score in zip(LABELS, scores)],
|
| 394 |
+
key=lambda x: x[1],
|
| 395 |
+
reverse=True
|
| 396 |
+
)[:2]
|
| 397 |
+
|
| 398 |
+
matched_scores = [
|
| 399 |
+
(label, score, PATTERN_WEIGHTS.get(label, 1.0))
|
| 400 |
+
for label, score in zip(LABELS, scores)
|
| 401 |
+
if score > adjusted_thresholds[label]
|
| 402 |
+
]
|
| 403 |
+
|
| 404 |
+
abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
|
| 405 |
+
abuse_score = abuse_score_raw
|
| 406 |
+
|
| 407 |
+
stage = get_risk_stage(threshold_labels, sentiment) if threshold_labels else 1
|
| 408 |
+
if weapon_flag and stage < 2:
|
| 409 |
+
stage = 2
|
| 410 |
+
|
| 411 |
+
if weapon_flag:
|
| 412 |
+
abuse_score_raw = min(abuse_score_raw + 25, 100)
|
| 413 |
+
|
| 414 |
+
abuse_score = min(abuse_score_raw, 100 if "threat" in threshold_labels or "control" in threshold_labels else 95)
|
| 415 |
+
|
| 416 |
+
# Get tone tag
|
| 417 |
+
tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
|
| 418 |
+
print(f"Emotional Tone Tag: {tone_tag}")
|
| 419 |
+
|
| 420 |
+
# Debug logs
|
| 421 |
+
print("Emotion Profile:")
|
| 422 |
+
for emotion, score in emotion_profile.items():
|
| 423 |
+
print(f" {emotion.capitalize():10}: {score}")
|
| 424 |
+
print("\n--- Debug Info ---")
|
| 425 |
+
print(f"Text: {text}")
|
| 426 |
+
print(f"Sentiment (via emotion): {sentiment} (score: {round(sentiment_score, 3)})")
|
| 427 |
+
print("Abuse Pattern Scores:")
|
| 428 |
+
for label, score in zip(LABELS, scores):
|
| 429 |
+
passed = "✅" if score > adjusted_thresholds[label] else "❌"
|
| 430 |
+
print(f" {label:25} → {score:.3f} {passed}")
|
| 431 |
+
print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
|
| 432 |
+
print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
|
| 433 |
+
print(f"Motifs: {motifs}")
|
| 434 |
+
print(f"Contradiction: {contradiction_flag}")
|
| 435 |
+
print("------------------\n")
|
| 436 |
+
|
| 437 |
+
return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score
|
| 438 |
|
| 439 |
def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
| 440 |
none_selected_checked = answers_and_none[-1]
|
|
|
|
| 459 |
return "Please enter at least one message."
|
| 460 |
|
| 461 |
results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
|
|
|
|
| 462 |
for result, date in results:
|
| 463 |
+
assert len(result) == 6, "Unexpected output from analyze_single_message"
|
| 464 |
+
abuse_scores = [r[0][0] for r in results]
|
| 465 |
+
top_labels = [r[0][1][0] if r[0][1] else r[0][2][0][0] for r in results]
|
| 466 |
top_scores = [r[0][2][0][1] for r in results]
|
| 467 |
sentiments = [r[0][3]['label'] for r in results]
|
| 468 |
stages = [r[0][4] for r in results]
|
| 469 |
darvo_scores = [r[0][5] for r in results]
|
| 470 |
+
dates_used = [r[1] or "Undated" for r in results] # Store dates for future mapping
|
|
|
|
| 471 |
|
| 472 |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
| 473 |
top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
|
|
|
|
| 484 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
| 485 |
out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
|
| 486 |
|
| 487 |
+
# Save this line for later use at the
|
| 488 |
if escalation_score is None:
|
| 489 |
escalation_text = "📉 Escalation Potential: Unknown (Checklist not completed)\n"
|
| 490 |
escalation_text += "⚠️ *This section was not completed. Escalation potential is unknown.*\n"
|
|
|
|
| 492 |
escalation_text = f"🧨 **Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})**\n"
|
| 493 |
escalation_text += "This score comes directly from the safety checklist and functions as a standalone escalation risk score.\n"
|
| 494 |
escalation_text += "It indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
|
|
|
|
| 495 |
if top_label is None:
|
| 496 |
top_label = "Unknown – 0%"
|
|
|
|
| 497 |
out += generate_risk_snippet(composite_abuse, top_label, escalation_score if escalation_score is not None else 0, most_common_stage)
|
| 498 |
out += f"\n\n{stage_text}"
|
| 499 |
out += darvo_blurb
|
|
|
|
|
|
|
| 500 |
print(f"DEBUG: avg_darvo = {avg_darvo}")
|
| 501 |
+
pattern_labels = [r[0][2][0][0] for r in results] # top label for each message
|
| 502 |
timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
|
| 503 |
+
out += "\n\n" + escalation_text
|
| 504 |
return out, timeline_image
|
| 505 |
+
|
| 506 |
message_date_pairs = [
|
| 507 |
(
|
| 508 |
gr.Textbox(label=f"Message {i+1}"),
|