Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ def get_emotion_profile(text):
|
|
15 |
if isinstance(emotions, list) and isinstance(emotions[0], list):
|
16 |
emotions = emotions[0]
|
17 |
return {e['label'].lower(): round(e['score'], 3) for e in emotions}
|
18 |
-
|
19 |
emotion_pipeline = hf_pipeline(
|
20 |
"text-classification",
|
21 |
model="j-hartmann/emotion-english-distilroberta-base",
|
@@ -23,7 +23,15 @@ emotion_pipeline = hf_pipeline(
|
|
23 |
truncation=True
|
24 |
)
|
25 |
|
|
|
26 |
def generate_abuse_score_chart(dates, scores, labels):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
|
28 |
parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
|
29 |
x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
|
@@ -33,12 +41,13 @@ def generate_abuse_score_chart(dates, scores, labels):
|
|
33 |
|
34 |
fig, ax = plt.subplots(figsize=(8, 3))
|
35 |
ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
|
|
|
36 |
for x, y in zip(parsed_x, scores):
|
37 |
ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
|
38 |
|
39 |
ax.set_xticks(parsed_x)
|
40 |
ax.set_xticklabels(x_labels)
|
41 |
-
ax.set_xlabel("")
|
42 |
ax.set_ylabel("Abuse Score (%)")
|
43 |
ax.set_ylim(0, 105)
|
44 |
ax.grid(True)
|
@@ -49,32 +58,305 @@ def generate_abuse_score_chart(dates, scores, labels):
|
|
49 |
buf.seek(0)
|
50 |
return Image.open(buf)
|
51 |
|
|
|
|
|
52 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
53 |
|
54 |
model_name = "SamanthaStorm/tether-multilabel-v3"
|
55 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
56 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
57 |
|
58 |
-
LABELS = [
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
def analyze_single_message(text, thresholds):
|
69 |
motif_hits, matched_phrases = detect_motifs(text)
|
|
|
|
|
70 |
emotion_profile = get_emotion_profile(text)
|
71 |
sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
|
72 |
|
|
|
73 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
74 |
with torch.no_grad():
|
75 |
outputs = model(**inputs)
|
76 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
77 |
|
|
|
78 |
if emotion_profile.get("neutral", 0) > 0.85 and any(
|
79 |
scores[label_idx] > thresholds[LABELS[label_idx]]
|
80 |
for label_idx in [LABELS.index(l) for l in ["control", "threat", "blame shifting"]]
|
@@ -83,17 +365,76 @@ def analyze_single_message(text, thresholds):
|
|
83 |
else:
|
84 |
sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
|
85 |
|
|
|
|
|
86 |
adjusted_thresholds = {
|
87 |
k: v + 0.05 if sentiment == "supportive" else v
|
88 |
for k, v in thresholds.items()
|
89 |
}
|
90 |
|
91 |
-
|
92 |
-
|
|
|
|
|
93 |
if score > adjusted_thresholds[label]
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
99 |
none_selected_checked = answers_and_none[-1]
|
@@ -118,17 +459,15 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
|
118 |
return "Please enter at least one message."
|
119 |
|
120 |
results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
|
121 |
-
|
122 |
for result, date in results:
|
123 |
-
assert len(result) ==
|
124 |
-
|
125 |
-
top_labels = [r[0][
|
126 |
top_scores = [r[0][2][0][1] for r in results]
|
127 |
sentiments = [r[0][3]['label'] for r in results]
|
128 |
stages = [r[0][4] for r in results]
|
129 |
darvo_scores = [r[0][5] for r in results]
|
130 |
-
dates_used = [r[1] or "Undated" for r in results]
|
131 |
-
abuse_scores = [r[0][0] for r in results]
|
132 |
|
133 |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
134 |
top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
|
@@ -145,6 +484,7 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
|
145 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
146 |
out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
|
147 |
|
|
|
148 |
if escalation_score is None:
|
149 |
escalation_text = "📉 Escalation Potential: Unknown (Checklist not completed)\n"
|
150 |
escalation_text += "⚠️ *This section was not completed. Escalation potential is unknown.*\n"
|
@@ -152,20 +492,17 @@ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
|
152 |
escalation_text = f"🧨 **Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})**\n"
|
153 |
escalation_text += "This score comes directly from the safety checklist and functions as a standalone escalation risk score.\n"
|
154 |
escalation_text += "It indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
|
155 |
-
|
156 |
if top_label is None:
|
157 |
top_label = "Unknown – 0%"
|
158 |
-
|
159 |
out += generate_risk_snippet(composite_abuse, top_label, escalation_score if escalation_score is not None else 0, most_common_stage)
|
160 |
out += f"\n\n{stage_text}"
|
161 |
out += darvo_blurb
|
162 |
-
out += "\n\n" + escalation_text
|
163 |
-
|
164 |
print(f"DEBUG: avg_darvo = {avg_darvo}")
|
165 |
-
pattern_labels = [r[0][2][0][0] for r in results]
|
166 |
timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
|
|
|
167 |
return out, timeline_image
|
168 |
-
|
169 |
message_date_pairs = [
|
170 |
(
|
171 |
gr.Textbox(label=f"Message {i+1}"),
|
|
|
15 |
if isinstance(emotions, list) and isinstance(emotions[0], list):
|
16 |
emotions = emotions[0]
|
17 |
return {e['label'].lower(): round(e['score'], 3) for e in emotions}
|
18 |
+
# Emotion model (no retraining needed)
|
19 |
emotion_pipeline = hf_pipeline(
|
20 |
"text-classification",
|
21 |
model="j-hartmann/emotion-english-distilroberta-base",
|
|
|
23 |
truncation=True
|
24 |
)
|
25 |
|
26 |
+
# --- Timeline Visualization Function ---
|
27 |
def generate_abuse_score_chart(dates, scores, labels):
|
28 |
+
import matplotlib.pyplot as plt
|
29 |
+
import io
|
30 |
+
from PIL import Image
|
31 |
+
from datetime import datetime
|
32 |
+
import re
|
33 |
+
|
34 |
+
# Determine if all entries are valid dates
|
35 |
if all(re.match(r"\d{4}-\d{2}-\d{2}", d) for d in dates):
|
36 |
parsed_x = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
|
37 |
x_labels = [d.strftime("%Y-%m-%d") for d in parsed_x]
|
|
|
41 |
|
42 |
fig, ax = plt.subplots(figsize=(8, 3))
|
43 |
ax.plot(parsed_x, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
|
44 |
+
|
45 |
for x, y in zip(parsed_x, scores):
|
46 |
ax.text(x, y + 2, f"{int(y)}%", ha='center', fontsize=8, color='black')
|
47 |
|
48 |
ax.set_xticks(parsed_x)
|
49 |
ax.set_xticklabels(x_labels)
|
50 |
+
ax.set_xlabel("") # No axis label
|
51 |
ax.set_ylabel("Abuse Score (%)")
|
52 |
ax.set_ylim(0, 105)
|
53 |
ax.grid(True)
|
|
|
58 |
buf.seek(0)
|
59 |
return Image.open(buf)
|
60 |
|
61 |
+
|
62 |
+
# --- Abuse Model ---
|
63 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
64 |
|
65 |
model_name = "SamanthaStorm/tether-multilabel-v3"
|
66 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
67 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
68 |
|
69 |
+
LABELS = [
|
70 |
+
"blame shifting", "contradictory statements", "control", "dismissiveness",
|
71 |
+
"gaslighting", "guilt tripping", "insults", "obscure language",
|
72 |
+
"projection", "recovery phase", "threat"
|
73 |
+
]
|
74 |
+
|
75 |
+
THRESHOLDS = {
|
76 |
+
"blame shifting": 0.30, "contradictory statements": 0.30, "control": 0.08, "dismissiveness": 0.12,
|
77 |
+
"gaslighting": 0.09, "guilt tripping": 0.4, "insults": 0.10, "obscure language": 0.55,
|
78 |
+
"projection": 0.09, "recovery phase": 0.20, "threat": 0.15
|
79 |
+
}
|
80 |
+
|
81 |
+
PATTERN_WEIGHTS = {
|
82 |
+
"gaslighting": 1.5,
|
83 |
+
"control": 1.2,
|
84 |
+
"dismissiveness": 0.7,
|
85 |
+
"blame shifting": 0.8,
|
86 |
+
"guilt tripping": 1.2,
|
87 |
+
"insults": 1.4,
|
88 |
+
"projection": 1.2,
|
89 |
+
"recovery phase": 1.1,
|
90 |
+
"contradictory statements": 0.75,
|
91 |
+
"threat": 1.6 # 🔧 New: raise weight for threat
|
92 |
+
}
|
93 |
+
RISK_STAGE_LABELS = {
|
94 |
+
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
|
95 |
+
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
|
96 |
+
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
|
97 |
+
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
|
98 |
+
}
|
99 |
+
|
100 |
+
ESCALATION_QUESTIONS = [
|
101 |
+
("Partner has access to firearms or weapons", 4),
|
102 |
+
("Partner threatened to kill you", 3),
|
103 |
+
("Partner threatened you with a weapon", 3),
|
104 |
+
("Partner has ever choked you, even if you considered it consensual at the time", 4),
|
105 |
+
("Partner injured or threatened your pet(s)", 3),
|
106 |
+
("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
|
107 |
+
("Partner forced or coerced you into unwanted sexual acts", 3),
|
108 |
+
("Partner threatened to take away your children", 2),
|
109 |
+
("Violence has increased in frequency or severity", 3),
|
110 |
+
("Partner monitors your calls/GPS/social media", 2)
|
111 |
+
]
|
112 |
+
DARVO_PATTERNS = {
|
113 |
+
"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
|
114 |
+
}
|
115 |
+
DARVO_MOTIFS = [
|
116 |
+
"I never said that.", "You’re imagining things.", "That never happened.",
|
117 |
+
"You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.",
|
118 |
+
"I don’t know what you’re talking about.", "You’re overreacting.", "I didn’t mean it that way.",
|
119 |
+
"You’re twisting my words.", "You’re remembering it wrong.", "You’re always looking for something to complain about.",
|
120 |
+
"You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.",
|
121 |
+
"You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.",
|
122 |
+
"You’re always so dramatic.", "You’re just trying to make me look bad.",
|
123 |
+
|
124 |
+
"You’re crazy.", "You’re the one with the problem.", "You’re always so negative.",
|
125 |
+
"You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.",
|
126 |
+
"You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.",
|
127 |
+
"You’re the one causing all the problems.", "You’re just trying to make me feel guilty.",
|
128 |
+
"You’re the one who can’t let go of the past.", "You’re the one who’s always angry.",
|
129 |
+
"You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.",
|
130 |
+
"You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.",
|
131 |
+
"You’re the one who’s always making me look like the bad guy.",
|
132 |
+
"You’re the one who’s always making me feel like a failure.",
|
133 |
+
"You’re the one who’s always making me feel like I’m not good enough.",
|
134 |
+
|
135 |
+
"I can’t believe you’re doing this to me.", "You’re hurting me.",
|
136 |
+
"You’re making me feel like a terrible person.", "You’re always blaming me for everything.",
|
137 |
+
"You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.",
|
138 |
+
"You’re the one who’s toxic.", "You’re the one who’s gaslighting me.",
|
139 |
+
"You’re the one who’s always putting me down.", "You’re the one who’s always making me feel bad.",
|
140 |
+
"You’re the one who’s always making me feel like I’m not good enough.",
|
141 |
+
"You’re the one who’s always making me feel like I’m the problem.",
|
142 |
+
"You’re the one who’s always making me feel like I’m the bad guy.",
|
143 |
+
"You’re the one who’s always making me feel like I’m the villain.",
|
144 |
+
"You’re the one who’s always making me feel like I’m the one who needs to change.",
|
145 |
+
"You’re the one who’s always making me feel like I’m the one who’s wrong.",
|
146 |
+
"You’re the one who’s always making me feel like I’m the one who’s crazy.",
|
147 |
+
"You’re the one who’s always making me feel like I’m the one who’s abusive.",
|
148 |
+
"You’re the one who’s always making me feel like I’m the one who’s toxic."
|
149 |
+
]
|
150 |
+
def get_emotional_tone_tag(emotions, sentiment, patterns, abuse_score):
|
151 |
+
sadness = emotions.get("sadness", 0)
|
152 |
+
joy = emotions.get("joy", 0)
|
153 |
+
neutral = emotions.get("neutral", 0)
|
154 |
+
disgust = emotions.get("disgust", 0)
|
155 |
+
anger = emotions.get("anger", 0)
|
156 |
+
fear = emotions.get("fear", 0)
|
157 |
+
|
158 |
+
# 1. Performative Regret
|
159 |
+
if (
|
160 |
+
sadness > 0.4 and
|
161 |
+
any(p in patterns for p in ["blame shifting", "guilt tripping", "recovery phase"]) and
|
162 |
+
(sentiment == "undermining" or abuse_score > 40)
|
163 |
+
):
|
164 |
+
return "performative regret"
|
165 |
+
|
166 |
+
# 2. Coercive Warmth
|
167 |
+
if (
|
168 |
+
(joy > 0.3 or sadness > 0.4) and
|
169 |
+
any(p in patterns for p in ["control", "gaslighting"]) and
|
170 |
+
sentiment == "undermining"
|
171 |
+
):
|
172 |
+
return "coercive warmth"
|
173 |
+
|
174 |
+
# 3. Cold Invalidation
|
175 |
+
if (
|
176 |
+
(neutral + disgust) > 0.5 and
|
177 |
+
any(p in patterns for p in ["dismissiveness", "projection", "obscure language"]) and
|
178 |
+
sentiment == "undermining"
|
179 |
+
):
|
180 |
+
return "cold invalidation"
|
181 |
+
|
182 |
+
# 4. Genuine Vulnerability
|
183 |
+
if (
|
184 |
+
(sadness + fear) > 0.5 and
|
185 |
+
sentiment == "supportive" and
|
186 |
+
all(p in ["recovery phase"] for p in patterns)
|
187 |
+
):
|
188 |
+
return "genuine vulnerability"
|
189 |
+
|
190 |
+
# 5. Emotional Threat
|
191 |
+
if (
|
192 |
+
(anger + disgust) > 0.5 and
|
193 |
+
any(p in patterns for p in ["control", "threat", "insults", "dismissiveness"]) and
|
194 |
+
sentiment == "undermining"
|
195 |
+
):
|
196 |
+
return "emotional threat"
|
197 |
+
|
198 |
+
# 6. Weaponized Sadness
|
199 |
+
if (
|
200 |
+
sadness > 0.6 and
|
201 |
+
any(p in patterns for p in ["guilt tripping", "projection"]) and
|
202 |
+
sentiment == "undermining"
|
203 |
+
):
|
204 |
+
return "weaponized sadness"
|
205 |
+
|
206 |
+
# 7. Toxic Resignation
|
207 |
+
if (
|
208 |
+
neutral > 0.5 and
|
209 |
+
any(p in patterns for p in ["dismissiveness", "obscure language"]) and
|
210 |
+
sentiment == "undermining"
|
211 |
+
):
|
212 |
+
return "toxic resignation"
|
213 |
+
|
214 |
+
return None
|
215 |
+
def detect_contradiction(message):
|
216 |
+
patterns = [
|
217 |
+
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
|
218 |
+
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
|
219 |
+
(r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
|
220 |
+
(r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
|
221 |
+
(r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
|
222 |
+
(r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
|
223 |
+
]
|
224 |
+
return any(re.search(p, message, flags) for p, flags in patterns)
|
225 |
+
|
226 |
+
def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
|
227 |
+
# Count all detected DARVO-related patterns
|
228 |
+
pattern_hits = sum(1 for p in patterns if p.lower() in DARVO_PATTERNS)
|
229 |
+
|
230 |
+
# Sentiment delta
|
231 |
+
sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
|
232 |
+
|
233 |
+
# Match against DARVO motifs more loosely
|
234 |
+
motif_hits = sum(
|
235 |
+
any(phrase.lower() in motif.lower() or motif.lower() in phrase.lower()
|
236 |
+
for phrase in DARVO_MOTIFS)
|
237 |
+
for motif in motifs_found
|
238 |
+
)
|
239 |
+
motif_score = motif_hits / max(len(DARVO_MOTIFS), 1)
|
240 |
+
|
241 |
+
# Contradiction still binary
|
242 |
+
contradiction_score = 1.0 if contradiction_flag else 0.0
|
243 |
+
|
244 |
+
# Final DARVO score
|
245 |
+
return round(min(
|
246 |
+
0.3 * pattern_hits +
|
247 |
+
0.3 * sentiment_shift_score +
|
248 |
+
0.25 * motif_score +
|
249 |
+
0.15 * contradiction_score, 1.0
|
250 |
+
), 3)
|
251 |
+
def detect_weapon_language(text):
|
252 |
+
weapon_keywords = [
|
253 |
+
"knife", "knives", "stab", "cut you", "cutting",
|
254 |
+
"gun", "shoot", "rifle", "firearm", "pistol",
|
255 |
+
"bomb", "blow up", "grenade", "explode",
|
256 |
+
"weapon", "armed", "loaded", "kill you", "take you out"
|
257 |
+
]
|
258 |
+
text_lower = text.lower()
|
259 |
+
return any(word in text_lower for word in weapon_keywords)
|
260 |
+
def get_risk_stage(patterns, sentiment):
|
261 |
+
if "threat" in patterns or "insults" in patterns:
|
262 |
+
return 2
|
263 |
+
elif "recovery phase" in patterns:
|
264 |
+
return 3
|
265 |
+
elif "control" in patterns or "guilt tripping" in patterns:
|
266 |
+
return 1
|
267 |
+
elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
|
268 |
+
return 4
|
269 |
+
return 1
|
270 |
+
|
271 |
+
def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
|
272 |
+
if abuse_score >= 85 or escalation_score >= 16:
|
273 |
+
risk_level = "high"
|
274 |
+
elif abuse_score >= 60 or escalation_score >= 8:
|
275 |
+
risk_level = "moderate"
|
276 |
+
elif stage == 2 and abuse_score >= 40:
|
277 |
+
risk_level = "moderate" # 🔧 New rule for escalation stage
|
278 |
+
else:
|
279 |
+
risk_level = "low"
|
280 |
+
if isinstance(top_label, str) and " – " in top_label:
|
281 |
+
pattern_label, pattern_score = top_label.split(" – ")
|
282 |
+
else:
|
283 |
+
pattern_label = str(top_label) if top_label is not None else "Unknown"
|
284 |
+
pattern_score = ""
|
285 |
+
|
286 |
+
WHY_FLAGGED = {
|
287 |
+
"control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
|
288 |
+
"gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
|
289 |
+
"dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
|
290 |
+
"insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
|
291 |
+
"threat": "This message includes threatening language, which is a strong predictor of harm.",
|
292 |
+
"blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
|
293 |
+
"guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
|
294 |
+
"recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
|
295 |
+
"projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
|
296 |
+
"default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
|
297 |
+
}
|
298 |
|
299 |
+
explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
|
300 |
|
301 |
+
base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
|
302 |
+
base += f"This message shows strong indicators of **{pattern_label}**. "
|
303 |
+
|
304 |
+
if risk_level == "high":
|
305 |
+
base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
|
306 |
+
elif risk_level == "moderate":
|
307 |
+
base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
|
308 |
+
else:
|
309 |
+
base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
|
310 |
+
|
311 |
+
base += f"\n💡 *Why this might be flagged:*\n{explanation}\n"
|
312 |
+
base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
|
313 |
+
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
|
314 |
+
return base
|
315 |
+
def compute_abuse_score(matched_scores, sentiment):
|
316 |
+
if not matched_scores:
|
317 |
+
return 0
|
318 |
+
|
319 |
+
# Weighted average of passed patterns
|
320 |
+
weighted_total = sum(score * weight for _, score, weight in matched_scores)
|
321 |
+
weight_sum = sum(weight for _, _, weight in matched_scores)
|
322 |
+
base_score = (weighted_total / weight_sum) * 100
|
323 |
+
|
324 |
+
# Boost for pattern count
|
325 |
+
pattern_count = len(matched_scores)
|
326 |
+
scale = 1.0 + 0.25 * max(0, pattern_count - 1) # 1.25x for 2, 1.5x for 3+
|
327 |
+
scaled_score = base_score * scale
|
328 |
+
|
329 |
+
# Pattern floors
|
330 |
+
FLOORS = {
|
331 |
+
"threat": 70,
|
332 |
+
"control": 40,
|
333 |
+
"gaslighting": 30,
|
334 |
+
"insults": 25
|
335 |
+
}
|
336 |
+
floor = max(FLOORS.get(label, 0) for label, _, _ in matched_scores)
|
337 |
+
adjusted_score = max(scaled_score, floor)
|
338 |
+
|
339 |
+
# Sentiment tweak
|
340 |
+
if sentiment == "undermining" and adjusted_score < 50:
|
341 |
+
adjusted_score += 10
|
342 |
+
|
343 |
+
return min(adjusted_score, 100)
|
344 |
+
|
345 |
+
|
346 |
def analyze_single_message(text, thresholds):
|
347 |
motif_hits, matched_phrases = detect_motifs(text)
|
348 |
+
|
349 |
+
# Get emotion profile
|
350 |
emotion_profile = get_emotion_profile(text)
|
351 |
sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
|
352 |
|
353 |
+
# Get model scores first so they can be used in the neutral override
|
354 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
355 |
with torch.no_grad():
|
356 |
outputs = model(**inputs)
|
357 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
358 |
|
359 |
+
# Sentiment override if neutral masks abuse
|
360 |
if emotion_profile.get("neutral", 0) > 0.85 and any(
|
361 |
scores[label_idx] > thresholds[LABELS[label_idx]]
|
362 |
for label_idx in [LABELS.index(l) for l in ["control", "threat", "blame shifting"]]
|
|
|
365 |
else:
|
366 |
sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
|
367 |
|
368 |
+
weapon_flag = detect_weapon_language(text)
|
369 |
+
|
370 |
adjusted_thresholds = {
|
371 |
k: v + 0.05 if sentiment == "supportive" else v
|
372 |
for k, v in thresholds.items()
|
373 |
}
|
374 |
|
375 |
+
contradiction_flag = detect_contradiction(text)
|
376 |
+
|
377 |
+
threshold_labels = [
|
378 |
+
label for label, score in zip(LABELS, scores)
|
379 |
if score > adjusted_thresholds[label]
|
380 |
+
]
|
381 |
+
|
382 |
+
motifs = [phrase for _, phrase in matched_phrases]
|
383 |
+
|
384 |
+
darvo_score = calculate_darvo_score(
|
385 |
+
threshold_labels,
|
386 |
+
sentiment_before=0.0,
|
387 |
+
sentiment_after=sentiment_score,
|
388 |
+
motifs_found=motifs,
|
389 |
+
contradiction_flag=contradiction_flag
|
390 |
+
)
|
391 |
|
392 |
+
top_patterns = sorted(
|
393 |
+
[(label, score) for label, score in zip(LABELS, scores)],
|
394 |
+
key=lambda x: x[1],
|
395 |
+
reverse=True
|
396 |
+
)[:2]
|
397 |
+
|
398 |
+
matched_scores = [
|
399 |
+
(label, score, PATTERN_WEIGHTS.get(label, 1.0))
|
400 |
+
for label, score in zip(LABELS, scores)
|
401 |
+
if score > adjusted_thresholds[label]
|
402 |
+
]
|
403 |
+
|
404 |
+
abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
|
405 |
+
abuse_score = abuse_score_raw
|
406 |
+
|
407 |
+
stage = get_risk_stage(threshold_labels, sentiment) if threshold_labels else 1
|
408 |
+
if weapon_flag and stage < 2:
|
409 |
+
stage = 2
|
410 |
+
|
411 |
+
if weapon_flag:
|
412 |
+
abuse_score_raw = min(abuse_score_raw + 25, 100)
|
413 |
+
|
414 |
+
abuse_score = min(abuse_score_raw, 100 if "threat" in threshold_labels or "control" in threshold_labels else 95)
|
415 |
+
|
416 |
+
# Get tone tag
|
417 |
+
tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
|
418 |
+
print(f"Emotional Tone Tag: {tone_tag}")
|
419 |
+
|
420 |
+
# Debug logs
|
421 |
+
print("Emotion Profile:")
|
422 |
+
for emotion, score in emotion_profile.items():
|
423 |
+
print(f" {emotion.capitalize():10}: {score}")
|
424 |
+
print("\n--- Debug Info ---")
|
425 |
+
print(f"Text: {text}")
|
426 |
+
print(f"Sentiment (via emotion): {sentiment} (score: {round(sentiment_score, 3)})")
|
427 |
+
print("Abuse Pattern Scores:")
|
428 |
+
for label, score in zip(LABELS, scores):
|
429 |
+
passed = "✅" if score > adjusted_thresholds[label] else "❌"
|
430 |
+
print(f" {label:25} → {score:.3f} {passed}")
|
431 |
+
print(f"Matched for score: {[(l, round(s, 3)) for l, s, _ in matched_scores]}")
|
432 |
+
print(f"Abuse Score Raw: {round(abuse_score_raw, 1)}")
|
433 |
+
print(f"Motifs: {motifs}")
|
434 |
+
print(f"Contradiction: {contradiction_flag}")
|
435 |
+
print("------------------\n")
|
436 |
+
|
437 |
+
return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score
|
438 |
|
439 |
def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
440 |
none_selected_checked = answers_and_none[-1]
|
|
|
459 |
return "Please enter at least one message."
|
460 |
|
461 |
results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
|
|
|
462 |
for result, date in results:
|
463 |
+
assert len(result) == 6, "Unexpected output from analyze_single_message"
|
464 |
+
abuse_scores = [r[0][0] for r in results]
|
465 |
+
top_labels = [r[0][1][0] if r[0][1] else r[0][2][0][0] for r in results]
|
466 |
top_scores = [r[0][2][0][1] for r in results]
|
467 |
sentiments = [r[0][3]['label'] for r in results]
|
468 |
stages = [r[0][4] for r in results]
|
469 |
darvo_scores = [r[0][5] for r in results]
|
470 |
+
dates_used = [r[1] or "Undated" for r in results] # Store dates for future mapping
|
|
|
471 |
|
472 |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
473 |
top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
|
|
|
484 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
485 |
out += "📊 This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
|
486 |
|
487 |
+
# Save this line for later use at the
|
488 |
if escalation_score is None:
|
489 |
escalation_text = "📉 Escalation Potential: Unknown (Checklist not completed)\n"
|
490 |
escalation_text += "⚠️ *This section was not completed. Escalation potential is unknown.*\n"
|
|
|
492 |
escalation_text = f"🧨 **Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})**\n"
|
493 |
escalation_text += "This score comes directly from the safety checklist and functions as a standalone escalation risk score.\n"
|
494 |
escalation_text += "It indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
|
|
|
495 |
if top_label is None:
|
496 |
top_label = "Unknown – 0%"
|
|
|
497 |
out += generate_risk_snippet(composite_abuse, top_label, escalation_score if escalation_score is not None else 0, most_common_stage)
|
498 |
out += f"\n\n{stage_text}"
|
499 |
out += darvo_blurb
|
|
|
|
|
500 |
print(f"DEBUG: avg_darvo = {avg_darvo}")
|
501 |
+
pattern_labels = [r[0][2][0][0] for r in results] # top label for each message
|
502 |
timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
|
503 |
+
out += "\n\n" + escalation_text
|
504 |
return out, timeline_image
|
505 |
+
|
506 |
message_date_pairs = [
|
507 |
(
|
508 |
gr.Textbox(label=f"Message {i+1}"),
|