Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,8 +9,40 @@ import io
|
|
9 |
from PIL import Image
|
10 |
from datetime import datetime
|
11 |
|
12 |
-
# ———
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
DARVO_MOTIFS = [
|
15 |
"I never said that.", "You’re imagining things.", "That never happened.",
|
16 |
"You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.",
|
@@ -27,33 +59,30 @@ DARVO_MOTIFS = [
|
|
27 |
"You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.",
|
28 |
"You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.",
|
29 |
"You’re the one who’s always making me look like the bad guy.",
|
30 |
-
"You’re the one who’s always making me feel like a failure.",
|
31 |
-
"
|
32 |
-
"
|
33 |
-
"You’re
|
34 |
-
"You’re the one who’s
|
35 |
-
"You’re the one who’s
|
36 |
-
"You’re the one who’s always
|
37 |
-
"You’re the one who’s always making me feel like I’m the
|
38 |
-
"You’re the one who’s always making me feel like
|
39 |
-
"You’re the one who’s always making me feel like
|
40 |
-
"You’re the one who’s always making me feel like
|
41 |
-
"You’re the one who’s always making me feel like
|
|
|
|
|
|
|
42 |
]
|
43 |
-
|
44 |
-
"gaslighting": 1.3,
|
45 |
-
"control": 1.2,
|
46 |
-
"dismissiveness": 0.8,
|
47 |
-
"blame shifting": 0.8,
|
48 |
-
"contradictory statements": 0.75,
|
49 |
-
"threat": 1.5,
|
50 |
-
}
|
51 |
RISK_STAGE_LABELS = {
|
52 |
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
|
53 |
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
|
54 |
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
|
55 |
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
|
56 |
}
|
|
|
57 |
ESCALATION_QUESTIONS = [
|
58 |
("Partner has access to firearms or weapons", 4),
|
59 |
("Partner threatened to kill you", 3),
|
@@ -71,7 +100,10 @@ def detect_contradiction(message):
|
|
71 |
patterns = [
|
72 |
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
|
73 |
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
|
74 |
-
|
|
|
|
|
|
|
75 |
]
|
76 |
return any(re.search(pat, message, flags) for pat, flags in patterns)
|
77 |
|
@@ -90,116 +122,151 @@ def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
|
|
90 |
why = {
|
91 |
"control": "This message may reflect efforts to restrict someone’s autonomy.",
|
92 |
"gaslighting": "This message could be manipulating perception.",
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
}.get(label, "This message contains language patterns that may affect safety.")
|
95 |
if abuse_score>=85 or escalation_score>=16:
|
96 |
-
lvl="high"
|
97 |
elif abuse_score>=60 or escalation_score>=8:
|
98 |
-
lvl="moderate"
|
99 |
else:
|
100 |
-
lvl="low"
|
101 |
-
return f"\n\n🛑 Risk Level: {lvl.capitalize()}\nThis message shows **{label}**.\n💡 Why
|
102 |
|
103 |
def detect_weapon_language(text):
|
104 |
-
kws=["knife","gun","bomb","kill you","shoot","explode"]
|
105 |
-
t=text.lower()
|
106 |
return any(k in t for k in kws)
|
107 |
|
108 |
def get_risk_stage(patterns, sentiment):
|
109 |
-
if "threat" in patterns or "insults" in patterns:
|
110 |
-
|
111 |
-
if "
|
112 |
-
|
|
|
|
|
|
|
|
|
113 |
return 1
|
114 |
|
115 |
-
# ---
|
116 |
def generate_abuse_score_chart(dates, scores, labels):
|
117 |
try:
|
118 |
-
parsed=[datetime.strptime(d,"%Y-%m-%d") for d in dates]
|
119 |
except:
|
120 |
-
parsed=list(range(len(dates)))
|
121 |
-
fig,ax=plt.subplots(figsize=(8,3))
|
122 |
-
ax.plot(parsed,scores,marker='o',linestyle='-',color='darkred',linewidth=2)
|
123 |
for i,(x,y) in enumerate(zip(parsed,scores)):
|
124 |
-
ax.text(x,y+2,f"{labels[i]}\n{int(y)}%",ha='center',fontsize=8)
|
125 |
-
ax.
|
126 |
-
ax.
|
127 |
-
|
|
|
128 |
return Image.open(buf)
|
129 |
|
130 |
-
# --- Load
|
131 |
-
model_name="SamanthaStorm/tether-multilabel-v2"
|
132 |
-
model=AutoModelForSequenceClassification.from_pretrained(model_name)
|
133 |
-
tokenizer=AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
134 |
-
healthy_detector=pipeline("text-classification",model="distilbert-base-uncased-finetuned-sst-2-english")
|
135 |
-
sst_pipeline=pipeline("sentiment-analysis",model="distilbert-base-uncased-finetuned-sst-2-english")
|
136 |
-
|
137 |
-
LABELS=[
|
138 |
-
"blame shifting","contradictory statements","control","dismissiveness",
|
139 |
-
"gaslighting","guilt tripping","insults","obscure language",
|
140 |
-
"projection","recovery phase","threat"
|
141 |
-
]
|
142 |
-
THRESHOLDS={l:THRESHOLDS.get(l,0.3) for l in LABELS}
|
143 |
|
144 |
-
# --- Single
|
145 |
def analyze_single_message(text):
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
|
|
|
|
|
|
|
|
159 |
# DARVO
|
160 |
-
|
161 |
-
contradiction=detect_contradiction(text)
|
162 |
-
|
163 |
# stage + weapon
|
164 |
-
stage=get_risk_stage(labels,sentiment)
|
165 |
-
if detect_weapon_language(text):
|
|
|
|
|
166 |
# top patterns
|
167 |
-
top_patterns=sorted(zip(LABELS,probs),key=lambda x:x[1],reverse=True)[:2]
|
168 |
-
return {"abuse_score":abuse_score,"labels":labels,"sentiment":sentiment,
|
169 |
-
"stage":stage,"darvo_score":darvo,"top_patterns":top_patterns}
|
170 |
|
171 |
# --- Composite Analysis ---
|
172 |
-
def analyze_composite(
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
175 |
else:
|
176 |
-
|
177 |
-
|
178 |
-
msgs=[
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
# DARVO summary
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
from PIL import Image
|
10 |
from datetime import datetime
|
11 |
|
12 |
+
# ——— Constants & Utilities ———
|
13 |
+
LABELS = [
|
14 |
+
"blame shifting", "contradictory statements", "control", "dismissiveness",
|
15 |
+
"gaslighting", "guilt tripping", "insults", "obscure language",
|
16 |
+
"projection", "recovery phase", "threat"
|
17 |
+
]
|
18 |
+
|
19 |
+
THRESHOLDS = {
|
20 |
+
"blame shifting": 0.3,
|
21 |
+
"contradictory statements": 0.3,
|
22 |
+
"control": 0.35,
|
23 |
+
"dismissiveness": 0.4,
|
24 |
+
"gaslighting": 0.3,
|
25 |
+
"guilt tripping": 0.3,
|
26 |
+
"insults": 0.3,
|
27 |
+
"obscure language": 0.4,
|
28 |
+
"projection": 0.4,
|
29 |
+
"recovery phase": 0.35,
|
30 |
+
"threat": 0.3
|
31 |
+
}
|
32 |
+
|
33 |
+
PATTERN_WEIGHTS = {
|
34 |
+
"gaslighting": 1.3,
|
35 |
+
"control": 1.2,
|
36 |
+
"dismissiveness": 0.8,
|
37 |
+
"blame shifting": 0.8,
|
38 |
+
"contradictory statements": 0.75,
|
39 |
+
"threat": 1.5
|
40 |
+
}
|
41 |
+
|
42 |
+
DARVO_PATTERNS = set([
|
43 |
+
"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
|
44 |
+
])
|
45 |
+
|
46 |
DARVO_MOTIFS = [
|
47 |
"I never said that.", "You’re imagining things.", "That never happened.",
|
48 |
"You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.",
|
|
|
59 |
"You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.",
|
60 |
"You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.",
|
61 |
"You’re the one who’s always making me look like the bad guy.",
|
62 |
+
"You’re the one who’s always making me feel like a failure.",
|
63 |
+
"You’re the one who’s always making me feel like I’m not good enough.",
|
64 |
+
"I can’t believe you’re doing this to me.", "You’re hurting me.",
|
65 |
+
"You’re making me feel like a terrible person.", "You’re always blaming me for everything.",
|
66 |
+
"You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.",
|
67 |
+
"You’re the one who’s toxic.", "You’re the one who’s gaslighting me.",
|
68 |
+
"You’re the one who’s always putting me down.",
|
69 |
+
"You’re the one who’s always making me feel like I’m the problem.",
|
70 |
+
"You’re the one who’s always making me feel like the bad guy.",
|
71 |
+
"You’re the one who’s always making me feel like the villain.",
|
72 |
+
"You’re the one who’s always making me feel like the one who needs to change.",
|
73 |
+
"You’re the one who’s always making me feel like the one who’s wrong.",
|
74 |
+
"You’re the one who’s always making me feel like the one who’s crazy.",
|
75 |
+
"You’re the one who’s always making me feel like the one who’s abusive.",
|
76 |
+
"You’re the one who’s always making me feel like the one who’s toxic."
|
77 |
]
|
78 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
RISK_STAGE_LABELS = {
|
80 |
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
|
81 |
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
|
82 |
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
|
83 |
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
|
84 |
}
|
85 |
+
|
86 |
ESCALATION_QUESTIONS = [
|
87 |
("Partner has access to firearms or weapons", 4),
|
88 |
("Partner threatened to kill you", 3),
|
|
|
100 |
patterns = [
|
101 |
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
|
102 |
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
|
103 |
+
(r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
|
104 |
+
(r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
|
105 |
+
(r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
|
106 |
+
(r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
|
107 |
]
|
108 |
return any(re.search(pat, message, flags) for pat, flags in patterns)
|
109 |
|
|
|
122 |
why = {
|
123 |
"control": "This message may reflect efforts to restrict someone’s autonomy.",
|
124 |
"gaslighting": "This message could be manipulating perception.",
|
125 |
+
"dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
|
126 |
+
"insults": "Direct insults can erode emotional safety.",
|
127 |
+
"threat": "This message includes threatening language—a strong predictor of harm.",
|
128 |
+
"blame shifting": "This message may redirect responsibility to avoid accountability.",
|
129 |
+
"guilt tripping": "This message may induce guilt to control behavior.",
|
130 |
+
"recovery phase": "This message may be a tension-reset without change.",
|
131 |
+
"projection": "This message may attribute the speaker’s faults to the target.",
|
132 |
}.get(label, "This message contains language patterns that may affect safety.")
|
133 |
if abuse_score>=85 or escalation_score>=16:
|
134 |
+
lvl = "high"
|
135 |
elif abuse_score>=60 or escalation_score>=8:
|
136 |
+
lvl = "moderate"
|
137 |
else:
|
138 |
+
lvl = "low"
|
139 |
+
return f"\n\n🛑 Risk Level: {lvl.capitalize()}\nThis message shows **{label}**.\n💡 *Why:* {why}\n"
|
140 |
|
141 |
def detect_weapon_language(text):
|
142 |
+
kws = ["knife","gun","bomb","kill you","shoot","explode"]
|
143 |
+
t = text.lower()
|
144 |
return any(k in t for k in kws)
|
145 |
|
146 |
def get_risk_stage(patterns, sentiment):
|
147 |
+
if "threat" in patterns or "insults" in patterns:
|
148 |
+
return 2
|
149 |
+
if "control" in patterns or "guilt tripping" in patterns:
|
150 |
+
return 1
|
151 |
+
if "recovery phase" in patterns:
|
152 |
+
return 3
|
153 |
+
if sentiment == "supportive" and any(p in patterns for p in ["projection","dismissiveness"]):
|
154 |
+
return 4
|
155 |
return 1
|
156 |
|
157 |
+
# --- Visualization ---
|
158 |
def generate_abuse_score_chart(dates, scores, labels):
|
159 |
try:
|
160 |
+
parsed = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
|
161 |
except:
|
162 |
+
parsed = list(range(len(dates)))
|
163 |
+
fig, ax = plt.subplots(figsize=(8,3))
|
164 |
+
ax.plot(parsed, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
|
165 |
for i,(x,y) in enumerate(zip(parsed,scores)):
|
166 |
+
ax.text(x, y+2, f"{labels[i]}\n{int(y)}%", ha='center', fontsize=8)
|
167 |
+
ax.set_title("Abuse Intensity Over Time")
|
168 |
+
ax.set_xlabel("Date"); ax.set_ylabel("Abuse Score (%)")
|
169 |
+
ax.set_ylim(0,105); ax.grid(True); plt.tight_layout()
|
170 |
+
buf = io.BytesIO(); plt.savefig(buf, format='png'); buf.seek(0)
|
171 |
return Image.open(buf)
|
172 |
|
173 |
+
# --- Load Models & Pipelines ---
|
174 |
+
model_name = "SamanthaStorm/tether-multilabel-v2"
|
175 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
176 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
177 |
+
healthy_detector = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
|
178 |
+
sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
+
# --- Single-Message Analysis ---
|
181 |
def analyze_single_message(text):
|
182 |
+
# healthy bypass
|
183 |
+
healthy = healthy_detector(text)[0]
|
184 |
+
if healthy['label'] == "POSITIVE" and healthy['score'] > 0.9:
|
185 |
+
return {"abuse_score":0, "labels":[], "sentiment":"supportive", "stage":4, "darvo_score":0.0, "top_patterns":[]}
|
186 |
+
# model scoring
|
187 |
+
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
|
188 |
+
with torch.no_grad():
|
189 |
+
logits = model(**inputs).logits.squeeze(0)
|
190 |
+
probs = torch.sigmoid(logits).numpy()
|
191 |
+
labels = [lab for lab,p in zip(LABELS,probs) if p > THRESHOLDS[lab]]
|
192 |
+
# abuse score weighted
|
193 |
+
total_w = sum(PATTERN_WEIGHTS.get(l,1.0) for l in LABELS)
|
194 |
+
abuse_score = int(round(sum(probs[i]*PATTERN_WEIGHTS.get(l,1.0) for i,l in enumerate(LABELS))/total_w*100))
|
195 |
+
# sentiment shift
|
196 |
+
sst = sst_pipeline(text)[0]
|
197 |
+
sentiment = 'supportive' if sst['label']=='POSITIVE' else 'undermining'
|
198 |
+
sent_score = sst['score'] if sentiment=='undermining' else 0.0
|
199 |
# DARVO
|
200 |
+
motif_hits, matched = detect_motifs(text)
|
201 |
+
contradiction = detect_contradiction(text)
|
202 |
+
darvo_score = calculate_darvo_score(labels, 0.0, sent_score, matched, contradiction)
|
203 |
# stage + weapon
|
204 |
+
stage = get_risk_stage(labels, sentiment)
|
205 |
+
if detect_weapon_language(text):
|
206 |
+
abuse_score = min(abuse_score+25,100)
|
207 |
+
stage = max(stage,2)
|
208 |
# top patterns
|
209 |
+
top_patterns = sorted(zip(LABELS,probs), key=lambda x: x[1], reverse=True)[:2]
|
210 |
+
return {"abuse_score":abuse_score, "labels":labels, "sentiment":sentiment, "stage":stage, "darvo_score":darvo_score, "top_patterns":top_patterns}
|
|
|
211 |
|
212 |
# --- Composite Analysis ---
|
213 |
+
def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
|
214 |
+
none_checked = answers_and_none[-1]
|
215 |
+
any_checked = any(answers_and_none[:-1])
|
216 |
+
none_sel = (not any_checked) and none_checked
|
217 |
+
if none_sel:
|
218 |
+
esc_score = None; risk_level = 'unknown'
|
219 |
else:
|
220 |
+
esc_score = sum(w for (_,w),a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
|
221 |
+
risk_level = 'High' if esc_score>=16 else 'Moderate' if esc_score>=8 else 'Low'
|
222 |
+
msgs = [msg1, msg2, msg3]
|
223 |
+
dates = [date1, date2, date3]
|
224 |
+
active = [(m,d) for m,d in zip(msgs, dates) if m.strip()]
|
225 |
+
if not active:
|
226 |
+
return "Please enter at least one message."
|
227 |
+
results = [(analyze_single_message(m), d) for m,d in active]
|
228 |
+
abuse_scores = [r[0]['abuse_score'] for r in results]
|
229 |
+
top_labels = [r[0]['top_patterns'][0][0] if r[0]['top_patterns'] else 'None' for r,_ in results]
|
230 |
+
dates_used = [d or 'Undated' for _,d in results]
|
231 |
+
# common stage
|
232 |
+
stage_list = [r[0]['stage'] for r,_ in results]
|
233 |
+
most_common_stage = max(set(stage_list), key=stage_list.count)
|
234 |
+
composite_abuse = int(round(sum(abuse_scores)/len(abuse_scores)))
|
235 |
# DARVO summary
|
236 |
+
darvo_vals = [r[0]['darvo_score'] for r,_ in results]
|
237 |
+
avg_darvo = round(sum(darvo_vals)/len(darvo_vals),3)
|
238 |
+
darvo_blurb = ''
|
239 |
+
if avg_darvo>0.25:
|
240 |
+
lvl = 'moderate' if avg_darvo<0.65 else 'high'
|
241 |
+
darvo_blurb = f"\n🎭 DARVO Score: {avg_darvo} ({lvl})"
|
242 |
+
# output text
|
243 |
+
out = f"Abuse Intensity: {composite_abuse}%\n"
|
244 |
+
if esc_score is None:
|
245 |
+
out += "Escalation Potential: Unknown (Checklist not completed)\n"
|
246 |
+
else:
|
247 |
+
out += f"Escalation Potential: {risk_level} ({esc_score}/{sum(w for _,w in ESCALATION_QUESTIONS)})\n"
|
248 |
+
# risk snippet
|
249 |
+
pattern_score = f"{top_labels[0]} – {int(results[0][0]['top_patterns'][0][1]*100)}%" if results[0][0]['top_patterns'] else top_labels[0]
|
250 |
+
out += generate_risk_snippet(composite_abuse, pattern_score, esc_score or 0, most_common_stage)
|
251 |
+
out += darvo_blurb
|
252 |
+
img = generate_abuse_score_chart(dates_used, abuse_scores, top_labels)
|
253 |
+
return out, img
|
254 |
+
|
255 |
+
# --- Gradio Interface ---
|
256 |
+
message_date_pairs = [
|
257 |
+
(gr.Textbox(label=f"Message {i+1}"), gr.Textbox(label=f"Date {i+1} (optional)", placeholder="YYYY-MM-DD"))
|
258 |
+
for i in range(3)
|
259 |
+
]
|
260 |
+
textbox_inputs = [item for pair in message_date_pairs for item in pair]
|
261 |
+
quiz_boxes = [gr.Checkbox(label=q) for q,_ in ESCALATION_QUESTIONS]
|
262 |
+
none_box = gr.Checkbox(label="None of the above")
|
263 |
+
iface = gr.Interface(
|
264 |
+
fn=analyze_composite,
|
265 |
+
inputs=textbox_inputs + quiz_boxes + [none_box],
|
266 |
+
outputs=[gr.Textbox(label="Results"), gr.Image(label="Risk Stage Timeline", type="pil")],
|
267 |
+
title="Tether Abuse Pattern Detector v2",
|
268 |
+
allow_flagging="manual"
|
269 |
+
)
|
270 |
+
if __name__ == "__main__":
|
271 |
+
iface.launch()
|
272 |
+
|