Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
-
from transformers import pipeline,
|
5 |
from motif_tagging import detect_motifs
|
6 |
import re
|
7 |
import matplotlib.pyplot as plt
|
@@ -9,18 +9,54 @@ import io
|
|
9 |
from PIL import Image
|
10 |
from datetime import datetime
|
11 |
|
12 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
LABELS = [
|
14 |
"blame shifting", "contradictory statements", "control", "dismissiveness",
|
15 |
"gaslighting", "guilt tripping", "insults", "obscure language",
|
16 |
"projection", "recovery phase", "threat"
|
17 |
]
|
18 |
|
19 |
-
# <- Restore your exact thresholds here:
|
20 |
THRESHOLDS = {
|
21 |
-
"blame shifting": 0.3, "contradictory statements": 0.
|
22 |
-
"gaslighting": 0.
|
23 |
-
"projection": 0.
|
24 |
}
|
25 |
|
26 |
PATTERN_WEIGHTS = {
|
@@ -29,9 +65,27 @@ PATTERN_WEIGHTS = {
|
|
29 |
"dismissiveness": 0.8,
|
30 |
"blame shifting": 0.8,
|
31 |
"contradictory statements": 0.75,
|
32 |
-
"threat": 1.5
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
}
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
DARVO_PATTERNS = {
|
36 |
"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
|
37 |
}
|
@@ -43,6 +97,7 @@ DARVO_MOTIFS = [
|
|
43 |
"Youβre just trying to start a fight.", "I was only trying to help.", "Youβre making things up.",
|
44 |
"Youβre blowing this out of proportion.", "Youβre being paranoid.", "Youβre too emotional.",
|
45 |
"Youβre always so dramatic.", "Youβre just trying to make me look bad.",
|
|
|
46 |
"Youβre crazy.", "Youβre the one with the problem.", "Youβre always so negative.",
|
47 |
"Youβre just trying to control me.", "Youβre the abusive one.", "Youβre trying to ruin my life.",
|
48 |
"Youβre just jealous.", "Youβre the one who needs help.", "Youβre always playing the victim.",
|
@@ -53,6 +108,7 @@ DARVO_MOTIFS = [
|
|
53 |
"Youβre the one whoβs always making me look like the bad guy.",
|
54 |
"Youβre the one whoβs always making me feel like a failure.",
|
55 |
"Youβre the one whoβs always making me feel like Iβm not good enough.",
|
|
|
56 |
"I canβt believe youβre doing this to me.", "Youβre hurting me.",
|
57 |
"Youβre making me feel like a terrible person.", "Youβre always blaming me for everything.",
|
58 |
"Youβre the one whoβs abusive.", "Youβre the one whoβs controlling.", "Youβre the one whoβs manipulative.",
|
@@ -68,28 +124,6 @@ DARVO_MOTIFS = [
|
|
68 |
"Youβre the one whoβs always making me feel like Iβm the one whoβs abusive.",
|
69 |
"Youβre the one whoβs always making me feel like Iβm the one whoβs toxic."
|
70 |
]
|
71 |
-
|
72 |
-
RISK_STAGE_LABELS = {
|
73 |
-
1: "π Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
|
74 |
-
2: "π₯ Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
|
75 |
-
3: "π§οΈ Risk Stage: Reconciliation\nThis message reflects a reset attemptβapologies or emotional repair without accountability.",
|
76 |
-
4: "πΈ Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
|
77 |
-
}
|
78 |
-
|
79 |
-
ESCALATION_QUESTIONS = [
|
80 |
-
("Partner has access to firearms or weapons", 4),
|
81 |
-
("Partner threatened to kill you", 3),
|
82 |
-
("Partner threatened you with a weapon", 3),
|
83 |
-
("Partner has ever choked you", 4),
|
84 |
-
("Partner injured or threatened your pet(s)", 3),
|
85 |
-
("Partner has broken your things, punched walls, or thrown objects", 2),
|
86 |
-
("Partner forced or coerced you into unwanted sexual acts", 3),
|
87 |
-
("Partner threatened to take away your children", 2),
|
88 |
-
("Violence has increased in frequency or severity", 3),
|
89 |
-
("Partner monitors your calls, GPS, or social media", 2)
|
90 |
-
]
|
91 |
-
|
92 |
-
# βββ Helper Functions βββ
|
93 |
def detect_contradiction(message):
|
94 |
patterns = [
|
95 |
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
|
@@ -99,251 +133,247 @@ def detect_contradiction(message):
|
|
99 |
(r"\b(i donβt care).{0,15}(you never think of me)", re.IGNORECASE),
|
100 |
(r"\b(i guess iβm just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
|
101 |
]
|
102 |
-
return any(re.search(
|
103 |
-
|
104 |
def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
|
105 |
-
# count how many DARVO patterns were triggered
|
106 |
pattern_hits = len([p for p in patterns if p in DARVO_PATTERNS])
|
107 |
pattern_score = pattern_hits / len(DARVO_PATTERNS)
|
108 |
-
|
109 |
-
# measure how much sentiment shifts toward negativity
|
110 |
sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
|
111 |
|
112 |
-
# count DARVO motif occurrences
|
113 |
motif_hits = len([
|
114 |
-
|
115 |
-
if any(phrase.lower() in
|
116 |
])
|
117 |
motif_score = motif_hits / len(DARVO_MOTIFS)
|
118 |
|
119 |
-
# direct contradiction indicator
|
120 |
contradiction_score = 1.0 if contradiction_flag else 0.0
|
121 |
|
122 |
-
|
123 |
-
|
124 |
-
0.
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
)
|
129 |
-
return round(min(raw, 1.0), 3)
|
130 |
-
|
131 |
-
def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
|
132 |
-
label = top_label.split(" β ")[0]
|
133 |
-
why = {
|
134 |
-
"control": "efforts to restrict autonomy.",
|
135 |
-
"gaslighting": "manipulating perception.",
|
136 |
-
"dismissiveness": "invalidating experience.",
|
137 |
-
"insults": "direct insults erode safety.",
|
138 |
-
"threat": "threatening language predicts harm.",
|
139 |
-
"blame shifting": "avoiding accountability.",
|
140 |
-
"guilt tripping": "inducing guilt to control behavior.",
|
141 |
-
"recovery phase": "tension-reset without change.",
|
142 |
-
"projection": "attributing faults to the other person."
|
143 |
-
}.get(label, "This message contains concerning patterns.")
|
144 |
-
if abuse_score>=85 or escalation_score>=16:
|
145 |
-
lvl = "high"
|
146 |
-
elif abuse_score>=60 or escalation_score>=8:
|
147 |
-
lvl = "moderate"
|
148 |
-
else:
|
149 |
-
lvl = "low"
|
150 |
-
return f"\n\nπ Risk Level: {lvl.capitalize()}\nThis message shows **{label}**.\nπ‘ Why: {why}\n"
|
151 |
-
|
152 |
def detect_weapon_language(text):
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
|
|
|
|
|
|
|
|
157 |
def get_risk_stage(patterns, sentiment):
|
158 |
if "threat" in patterns or "insults" in patterns:
|
159 |
return 2
|
160 |
-
|
161 |
-
return 1
|
162 |
-
if "recovery phase" in patterns:
|
163 |
return 3
|
164 |
-
|
|
|
|
|
165 |
return 4
|
166 |
return 1
|
167 |
|
168 |
-
def
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
-
|
183 |
-
model_name="SamanthaStorm/tether-multilabel-v2"
|
184 |
-
model=AutoModelForSequenceClassification.from_pretrained(model_name)
|
185 |
-
tokenizer=AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
186 |
-
healthy_detector=pipeline("text-classification",model="distilbert-base-uncased-finetuned-sst-2-english")
|
187 |
-
sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
188 |
|
189 |
-
|
190 |
-
|
191 |
-
# healthy bypass
|
192 |
-
h = healthy_detector(text)[0]
|
193 |
-
|
194 |
-
# 1) Strongly positive β healthy
|
195 |
-
if h['label'] == "POSITIVE" and h['score'] > 0.8:
|
196 |
-
return {
|
197 |
-
"abuse_score": 0,
|
198 |
-
"labels": [],
|
199 |
-
"sentiment": "supportive",
|
200 |
-
"stage": 4,
|
201 |
-
"darvo_score": 0.0,
|
202 |
-
"top_patterns": []
|
203 |
-
}
|
204 |
-
|
205 |
-
# 2) Mildly negative/neutral β also healthy
|
206 |
-
elif h['label'] == "NEGATIVE" and h['score'] < 0.6:
|
207 |
-
return {
|
208 |
-
"abuse_score": 0,
|
209 |
-
"labels": [],
|
210 |
-
"sentiment": "supportive",
|
211 |
-
"stage": 4,
|
212 |
-
"darvo_score": 0.0,
|
213 |
-
"top_patterns": []
|
214 |
-
}
|
215 |
-
|
216 |
-
# β if neither healthy case, continue on to actual abuse detection β
|
217 |
-
inp = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
|
218 |
-
with torch.no_grad(): logits=model(**inp).logits.squeeze(0)
|
219 |
-
probs=torch.sigmoid(logits).numpy()
|
220 |
-
# β¦run tokenizer, get `probs` and then:
|
221 |
-
labels = [lab for lab,p in zip(LABELS, probs) if p > THRESHOLDS[lab]]
|
222 |
-
|
223 |
-
# **NEW**: if absolutely no pattern is detected, force a zeroβabuse βhealthyβ return:
|
224 |
-
if not labels:
|
225 |
-
return {
|
226 |
-
"abuse_score": 0,
|
227 |
-
"labels": [],
|
228 |
-
"sentiment": "supportive",
|
229 |
-
"stage": 4,
|
230 |
-
"darvo_score": 0.0,
|
231 |
-
"top_patterns": []
|
232 |
-
}
|
233 |
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
"stage":stage, "darvo_score":darvo_score, "top_patterns":top_patterns
|
256 |
}
|
257 |
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
risk_level = "unknown"
|
265 |
else:
|
266 |
-
|
267 |
-
risk_level =
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
|
|
|
|
|
|
273 |
if not active:
|
274 |
return "Please enter at least one message."
|
275 |
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
#
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
out += f"Message {i}: {score}% ({lvl})\n"
|
289 |
-
|
290 |
-
# now your existing composite logic
|
291 |
-
composite_abuse = int(round(sum(abuse_scores)/len(abuse_scores)))
|
292 |
-
out += f"Composite Abuse: {composite_abuse}%\n"
|
293 |
-
# β¦ etc. β¦
|
294 |
-
top_labels = [res["top_patterns"][0][0] if res["top_patterns"] else "None" for res, _ in results]
|
295 |
-
dates_used = [d or "Undated" for _, d in results]
|
296 |
-
stages = [res["stage"] for res, _ in results]
|
297 |
-
|
298 |
-
# overall risk stage & composite abuse
|
299 |
most_common_stage = max(set(stages), key=stages.count)
|
300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
|
302 |
-
# build the basic output text
|
303 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
304 |
-
|
|
|
|
|
305 |
out += "Escalation Potential: Unknown (Checklist not completed)\n"
|
|
|
306 |
else:
|
307 |
-
|
308 |
-
out +=
|
309 |
-
|
310 |
-
# if zero abuse, skip risk snippet & DARVO
|
311 |
-
img = generate_abuse_score_chart(dates_used, abuse_scores, top_labels)
|
312 |
-
if composite_abuse == 0:
|
313 |
-
return out, img
|
314 |
-
|
315 |
-
# compute DARVO summary
|
316 |
-
darvos = [res["darvo_score"] for res, _ in results]
|
317 |
-
avg_darvo = round(sum(darvos) / len(darvos), 3)
|
318 |
-
darvo_blurb = (
|
319 |
-
f"\nπ DARVO Score: {avg_darvo} ({'high' if avg_darvo >= 0.65 else 'moderate'})"
|
320 |
-
if avg_darvo > 0.25 else ""
|
321 |
-
)
|
322 |
|
323 |
-
|
324 |
-
|
325 |
-
first_score = int(results[0][0]["top_patterns"][0][1] * 100) if results[0][0]["top_patterns"] else 0
|
326 |
-
pattern_score = f"{first_pattern} β {first_score}%"
|
327 |
-
out += generate_risk_snippet(composite_abuse, pattern_score, esc_score or 0, most_common_stage)
|
328 |
out += darvo_blurb
|
329 |
|
330 |
-
|
331 |
-
|
332 |
-
|
|
|
333 |
message_date_pairs = [
|
334 |
-
(
|
|
|
|
|
|
|
335 |
for i in range(3)
|
336 |
]
|
337 |
-
|
|
|
338 |
none_box = gr.Checkbox(label="None of the above")
|
339 |
|
340 |
iface = gr.Interface(
|
341 |
fn=analyze_composite,
|
342 |
-
inputs=
|
343 |
-
outputs=[
|
344 |
-
|
|
|
|
|
|
|
345 |
allow_flagging="manual"
|
346 |
)
|
347 |
|
348 |
if __name__ == "__main__":
|
349 |
-
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
+
from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer
|
5 |
from motif_tagging import detect_motifs
|
6 |
import re
|
7 |
import matplotlib.pyplot as plt
|
|
|
9 |
from PIL import Image
|
10 |
from datetime import datetime
|
11 |
|
12 |
+
# --- Timeline Visualization Function ---
|
13 |
+
def generate_abuse_score_chart(dates, scores, labels):
|
14 |
+
import matplotlib.pyplot as plt
|
15 |
+
import io
|
16 |
+
from PIL import Image
|
17 |
+
from datetime import datetime
|
18 |
+
|
19 |
+
try:
|
20 |
+
parsed_dates = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
|
21 |
+
except Exception:
|
22 |
+
parsed_dates = list(range(len(dates)))
|
23 |
+
|
24 |
+
fig, ax = plt.subplots(figsize=(8, 3))
|
25 |
+
ax.plot(parsed_dates, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
|
26 |
+
|
27 |
+
for i, (x, y) in enumerate(zip(parsed_dates, scores)):
|
28 |
+
label = labels[i]
|
29 |
+
ax.text(x, y + 2, f"{label}\n{int(y)}%", ha='center', fontsize=8, color='black')
|
30 |
+
|
31 |
+
ax.set_title("Abuse Intensity Over Time")
|
32 |
+
ax.set_xlabel("Date")
|
33 |
+
ax.set_ylabel("Abuse Score (%)")
|
34 |
+
ax.set_ylim(0, 105)
|
35 |
+
ax.grid(True)
|
36 |
+
plt.tight_layout()
|
37 |
+
|
38 |
+
buf = io.BytesIO()
|
39 |
+
plt.savefig(buf, format='png')
|
40 |
+
buf.seek(0)
|
41 |
+
return Image.open(buf)
|
42 |
+
# --- SST Sentiment Model ---
|
43 |
+
sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
44 |
+
|
45 |
+
# --- Abuse Model ---
|
46 |
+
model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
|
47 |
+
model = RobertaForSequenceClassification.from_pretrained(model_name)
|
48 |
+
tokenizer = RobertaTokenizer.from_pretrained(model_name)
|
49 |
+
|
50 |
LABELS = [
|
51 |
"blame shifting", "contradictory statements", "control", "dismissiveness",
|
52 |
"gaslighting", "guilt tripping", "insults", "obscure language",
|
53 |
"projection", "recovery phase", "threat"
|
54 |
]
|
55 |
|
|
|
56 |
THRESHOLDS = {
|
57 |
+
"blame shifting": 0.3, "contradictory statements": 0.36, "control": 0.48, "dismissiveness": 0.45,
|
58 |
+
"gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
|
59 |
+
"projection": 0.35, "recovery phase": 0.25, "threat": 0.25
|
60 |
}
|
61 |
|
62 |
PATTERN_WEIGHTS = {
|
|
|
65 |
"dismissiveness": 0.8,
|
66 |
"blame shifting": 0.8,
|
67 |
"contradictory statements": 0.75,
|
68 |
+
"threat": 1.5 # π§ New: raise weight for threat
|
69 |
+
}
|
70 |
+
RISK_STAGE_LABELS = {
|
71 |
+
1: "π Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
|
72 |
+
2: "π₯ Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
|
73 |
+
3: "π§οΈ Risk Stage: Reconciliation\nThis message reflects a reset attemptβapologies or emotional repair without accountability.",
|
74 |
+
4: "πΈ Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
|
75 |
}
|
76 |
|
77 |
+
ESCALATION_QUESTIONS = [
|
78 |
+
("Partner has access to firearms or weapons", 4),
|
79 |
+
("Partner threatened to kill you", 3),
|
80 |
+
("Partner threatened you with a weapon", 3),
|
81 |
+
("Partner has ever choked you, even if you considered it consensual at the time", 4),
|
82 |
+
("Partner injured or threatened your pet(s)", 3),
|
83 |
+
("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
|
84 |
+
("Partner forced or coerced you into unwanted sexual acts", 3),
|
85 |
+
("Partner threatened to take away your children", 2),
|
86 |
+
("Violence has increased in frequency or severity", 3),
|
87 |
+
("Partner monitors your calls/GPS/social media", 2)
|
88 |
+
]
|
89 |
DARVO_PATTERNS = {
|
90 |
"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
|
91 |
}
|
|
|
97 |
"Youβre just trying to start a fight.", "I was only trying to help.", "Youβre making things up.",
|
98 |
"Youβre blowing this out of proportion.", "Youβre being paranoid.", "Youβre too emotional.",
|
99 |
"Youβre always so dramatic.", "Youβre just trying to make me look bad.",
|
100 |
+
|
101 |
"Youβre crazy.", "Youβre the one with the problem.", "Youβre always so negative.",
|
102 |
"Youβre just trying to control me.", "Youβre the abusive one.", "Youβre trying to ruin my life.",
|
103 |
"Youβre just jealous.", "Youβre the one who needs help.", "Youβre always playing the victim.",
|
|
|
108 |
"Youβre the one whoβs always making me look like the bad guy.",
|
109 |
"Youβre the one whoβs always making me feel like a failure.",
|
110 |
"Youβre the one whoβs always making me feel like Iβm not good enough.",
|
111 |
+
|
112 |
"I canβt believe youβre doing this to me.", "Youβre hurting me.",
|
113 |
"Youβre making me feel like a terrible person.", "Youβre always blaming me for everything.",
|
114 |
"Youβre the one whoβs abusive.", "Youβre the one whoβs controlling.", "Youβre the one whoβs manipulative.",
|
|
|
124 |
"Youβre the one whoβs always making me feel like Iβm the one whoβs abusive.",
|
125 |
"Youβre the one whoβs always making me feel like Iβm the one whoβs toxic."
|
126 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
def detect_contradiction(message):
|
128 |
patterns = [
|
129 |
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
|
|
|
133 |
(r"\b(i donβt care).{0,15}(you never think of me)", re.IGNORECASE),
|
134 |
(r"\b(i guess iβm just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
|
135 |
]
|
136 |
+
return any(re.search(p, message, flags) for p, flags in patterns)
|
137 |
+
|
138 |
def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
|
|
|
139 |
pattern_hits = len([p for p in patterns if p in DARVO_PATTERNS])
|
140 |
pattern_score = pattern_hits / len(DARVO_PATTERNS)
|
141 |
+
|
|
|
142 |
sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
|
143 |
|
|
|
144 |
motif_hits = len([
|
145 |
+
motif for motif in motifs_found
|
146 |
+
if any(phrase.lower() in motif.lower() for phrase in DARVO_MOTIFS)
|
147 |
])
|
148 |
motif_score = motif_hits / len(DARVO_MOTIFS)
|
149 |
|
|
|
150 |
contradiction_score = 1.0 if contradiction_flag else 0.0
|
151 |
|
152 |
+
return round(min(
|
153 |
+
0.3 * pattern_score +
|
154 |
+
0.3 * sentiment_shift_score +
|
155 |
+
0.25 * motif_score +
|
156 |
+
0.15 * contradiction_score, 1.0
|
157 |
+
), 3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
def detect_weapon_language(text):
|
159 |
+
weapon_keywords = [
|
160 |
+
"knife", "knives", "stab", "cut you", "cutting",
|
161 |
+
"gun", "shoot", "rifle", "firearm", "pistol",
|
162 |
+
"bomb", "blow up", "grenade", "explode",
|
163 |
+
"weapon", "armed", "loaded", "kill you", "take you out"
|
164 |
+
]
|
165 |
+
text_lower = text.lower()
|
166 |
+
return any(word in text_lower for word in weapon_keywords)
|
167 |
def get_risk_stage(patterns, sentiment):
|
168 |
if "threat" in patterns or "insults" in patterns:
|
169 |
return 2
|
170 |
+
elif "recovery phase" in patterns:
|
|
|
|
|
171 |
return 3
|
172 |
+
elif "control" in patterns or "guilt tripping" in patterns:
|
173 |
+
return 1
|
174 |
+
elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
|
175 |
return 4
|
176 |
return 1
|
177 |
|
178 |
+
def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
|
179 |
+
if abuse_score >= 85 or escalation_score >= 16:
|
180 |
+
risk_level = "high"
|
181 |
+
elif abuse_score >= 60 or escalation_score >= 8:
|
182 |
+
risk_level = "moderate"
|
183 |
+
elif stage == 2 and abuse_score >= 40:
|
184 |
+
risk_level = "moderate" # π§ New rule for escalation stage
|
185 |
+
else:
|
186 |
+
risk_level = "low"
|
187 |
+
pattern_label = top_label.split(" β ")[0]
|
188 |
+
pattern_score = top_label.split(" β ")[1] if " β " in top_label else ""
|
189 |
+
|
190 |
+
WHY_FLAGGED = {
|
191 |
+
"control": "This message may reflect efforts to restrict someoneβs autonomy, even if it's framed as concern or care.",
|
192 |
+
"gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
|
193 |
+
"dismissiveness": "This message may include belittling, invalidating, or ignoring the other personβs experience.",
|
194 |
+
"insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
|
195 |
+
"threat": "This message includes threatening language, which is a strong predictor of harm.",
|
196 |
+
"blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
|
197 |
+
"guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
|
198 |
+
"recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
|
199 |
+
"projection": "This message may involve attributing the abuserβs own behaviors to the victim.",
|
200 |
+
"default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
|
201 |
+
}
|
202 |
|
203 |
+
explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
+
base = f"\n\nπ Risk Level: {risk_level.capitalize()}\n"
|
206 |
+
base += f"This message shows strong indicators of **{pattern_label}**. "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
|
208 |
+
if risk_level == "high":
|
209 |
+
base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
|
210 |
+
elif risk_level == "moderate":
|
211 |
+
base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
|
212 |
+
else:
|
213 |
+
base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
|
214 |
+
|
215 |
+
base += f"\nπ‘ *Why this might be flagged:*\n{explanation}\n"
|
216 |
+
base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
|
217 |
+
base += "π§ You can review the pattern in context. This tool highlights possible dynamicsβnot judgments."
|
218 |
+
|
219 |
+
return base
|
220 |
+
def analyze_single_message(text, thresholds):
|
221 |
+
motif_hits, matched_phrases = detect_motifs(text)
|
222 |
+
result = sst_pipeline(text)[0]
|
223 |
+
sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
|
224 |
+
sentiment_score = result['score'] if sentiment == "undermining" else 0.0
|
225 |
+
weapon_flag = detect_weapon_language(text)
|
226 |
+
adjusted_thresholds = {
|
227 |
+
k: v + 0.05 if sentiment == "supportive" else v
|
228 |
+
for k, v in thresholds.items()
|
|
|
229 |
}
|
230 |
|
231 |
+
contradiction_flag = detect_contradiction(text)
|
232 |
+
|
233 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
234 |
+
with torch.no_grad():
|
235 |
+
outputs = model(**inputs)
|
236 |
+
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
237 |
+
|
238 |
+
threshold_labels = [
|
239 |
+
label for label, score in zip(LABELS, scores)
|
240 |
+
if score > adjusted_thresholds[label]
|
241 |
+
]
|
242 |
+
|
243 |
+
motifs = [phrase for _, phrase in matched_phrases]
|
244 |
+
|
245 |
+
darvo_score = calculate_darvo_score(
|
246 |
+
threshold_labels,
|
247 |
+
sentiment_before=0.0,
|
248 |
+
sentiment_after=sentiment_score,
|
249 |
+
motifs_found=motifs,
|
250 |
+
contradiction_flag=contradiction_flag
|
251 |
+
)
|
252 |
+
top_patterns = sorted(
|
253 |
+
[(label, score) for label, score in zip(LABELS, scores)],
|
254 |
+
key=lambda x: x[1],
|
255 |
+
reverse=True
|
256 |
+
)[:2]
|
257 |
+
|
258 |
+
# Compute weighted average across all patterns (not just top 2)
|
259 |
+
weighted_total = 0.0
|
260 |
+
weight_sum = 0.0
|
261 |
+
for label, score in zip(LABELS, scores):
|
262 |
+
weight = PATTERN_WEIGHTS.get(label, 1.0)
|
263 |
+
weighted_total += score * weight
|
264 |
+
weight_sum += weight
|
265 |
+
|
266 |
+
abuse_score_raw = (weighted_total / weight_sum) * 100
|
267 |
+
stage = get_risk_stage(threshold_labels, sentiment)
|
268 |
+
if weapon_flag:
|
269 |
+
abuse_score_raw = min(abuse_score_raw + 25, 100) # boost intensity
|
270 |
+
if weapon_flag and stage < 2:
|
271 |
+
stage = 2
|
272 |
+
if weapon_flag:
|
273 |
+
print("β οΈ Weapon-related language detected.")
|
274 |
+
|
275 |
+
if "threat" in threshold_labels or "control" in threshold_labels or "insults" in threshold_labels:
|
276 |
+
abuse_score = min(abuse_score_raw, 100)
|
277 |
+
else:
|
278 |
+
abuse_score = min(abuse_score_raw, 95)
|
279 |
+
|
280 |
+
|
281 |
+
|
282 |
+
print("\n--- Debug Info ---")
|
283 |
+
print(f"Text: {text}")
|
284 |
+
print(f"Sentiment: {sentiment} (raw: {result['label']}, score: {result['score']:.3f})")
|
285 |
+
print("Abuse Pattern Scores:")
|
286 |
+
for label, score in zip(LABELS, scores):
|
287 |
+
passed = "β
" if score > adjusted_thresholds[label] else "β"
|
288 |
+
print(f" {label:25} β {score:.3f} {passed}")
|
289 |
+
print(f"Motifs: {motifs}")
|
290 |
+
print(f"Contradiction: {contradiction_flag}")
|
291 |
+
print("------------------\n")
|
292 |
+
|
293 |
+
return abuse_score, threshold_labels, top_patterns, result, stage, darvo_score
|
294 |
+
|
295 |
+
def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
296 |
+
none_selected_checked = answers_and_none[-1]
|
297 |
+
responses_checked = any(answers_and_none[:-1])
|
298 |
+
none_selected = not responses_checked and none_selected_checked
|
299 |
+
|
300 |
+
if none_selected:
|
301 |
+
escalation_score = None
|
302 |
risk_level = "unknown"
|
303 |
else:
|
304 |
+
escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
|
305 |
+
risk_level = (
|
306 |
+
"High" if escalation_score >= 16 else
|
307 |
+
"Moderate" if escalation_score >= 8 else
|
308 |
+
"Low"
|
309 |
+
)
|
310 |
+
|
311 |
+
messages = [msg1, msg2, msg3]
|
312 |
+
dates = [date1, date2, date3]
|
313 |
+
active = [(m, d) for m, d in zip(messages, dates) if m.strip()]
|
314 |
if not active:
|
315 |
return "Please enter at least one message."
|
316 |
|
317 |
+
results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
|
318 |
+
abuse_scores = [r[0][0] for r in results]
|
319 |
+
top_labels = [r[0][2][0][0] for r in results]
|
320 |
+
top_scores = [r[0][2][0][1] for r in results]
|
321 |
+
sentiments = [r[0][3]['label'] for r in results]
|
322 |
+
stages = [r[0][4] for r in results]
|
323 |
+
darvo_scores = [r[0][5] for r in results]
|
324 |
+
dates_used = [r[1] or "Undated" for r in results] # Store dates for future mapping
|
325 |
+
|
326 |
+
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
327 |
+
top_label = f"{top_labels[0]} β {int(round(top_scores[0] * 100))}%"
|
328 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
most_common_stage = max(set(stages), key=stages.count)
|
330 |
+
stage_text = RISK_STAGE_LABELS[most_common_stage]
|
331 |
+
|
332 |
+
avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
|
333 |
+
darvo_blurb = ""
|
334 |
+
if avg_darvo > 0.25:
|
335 |
+
level = "moderate" if avg_darvo < 0.65 else "high"
|
336 |
+
darvo_blurb = f"\n\nπ **DARVO Score: {avg_darvo}** β This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
|
337 |
|
|
|
338 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
339 |
+
out += "π This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
|
340 |
+
|
341 |
+
if escalation_score is None:
|
342 |
out += "Escalation Potential: Unknown (Checklist not completed)\n"
|
343 |
+
out += "π *This section was not completed. Escalation potential is unknown.*\n\n"
|
344 |
else:
|
345 |
+
out += f"Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})\n"
|
346 |
+
out += "π¨ This indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
|
348 |
+
out += generate_risk_snippet(composite_abuse, top_label, escalation_score if escalation_score is not None else 0, most_common_stage)
|
349 |
+
out += f"\n\n{stage_text}"
|
|
|
|
|
|
|
350 |
out += darvo_blurb
|
351 |
|
352 |
+
pattern_labels = [r[0][2][0][0] for r in results] # top label for each message
|
353 |
+
timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
|
354 |
+
return out, timeline_image
|
355 |
+
|
356 |
message_date_pairs = [
|
357 |
+
(
|
358 |
+
gr.Textbox(label=f"Message {i+1}"),
|
359 |
+
gr.Textbox(label=f"Date {i+1} (optional)", placeholder="YYYY-MM-DD")
|
360 |
+
)
|
361 |
for i in range(3)
|
362 |
]
|
363 |
+
textbox_inputs = [item for pair in message_date_pairs for item in pair]
|
364 |
+
quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
|
365 |
none_box = gr.Checkbox(label="None of the above")
|
366 |
|
367 |
iface = gr.Interface(
|
368 |
fn=analyze_composite,
|
369 |
+
inputs=textbox_inputs + quiz_boxes + [none_box],
|
370 |
+
outputs=[
|
371 |
+
gr.Textbox(label="Results"),
|
372 |
+
gr.Image(label="Risk Stage Timeline", type="pil")
|
373 |
+
],
|
374 |
+
title="Abuse Pattern Detector + Escalation Quiz",
|
375 |
allow_flagging="manual"
|
376 |
)
|
377 |
|
378 |
if __name__ == "__main__":
|
379 |
+
iface.launch().
|