SamanthaStorm commited on
Commit
e46fbeb
Β·
verified Β·
1 Parent(s): 8da6955

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +259 -229
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import torch
3
  import numpy as np
4
- from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
5
  from motif_tagging import detect_motifs
6
  import re
7
  import matplotlib.pyplot as plt
@@ -9,18 +9,54 @@ import io
9
  from PIL import Image
10
  from datetime import datetime
11
 
12
- # β€”β€”β€” Constants β€”β€”β€”
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  LABELS = [
14
  "blame shifting", "contradictory statements", "control", "dismissiveness",
15
  "gaslighting", "guilt tripping", "insults", "obscure language",
16
  "projection", "recovery phase", "threat"
17
  ]
18
 
19
- # <- Restore your exact thresholds here:
20
  THRESHOLDS = {
21
- "blame shifting": 0.3, "contradictory statements": 0.3, "control": 0.35, "dismissiveness": 0.4,
22
- "gaslighting": 0.3, "guilt tripping": 0.3, "insults": 0.3, "obscure language": 0.4,
23
- "projection": 0.4, "recovery phase": 0.35, "threat": 0.3
24
  }
25
 
26
  PATTERN_WEIGHTS = {
@@ -29,9 +65,27 @@ PATTERN_WEIGHTS = {
29
  "dismissiveness": 0.8,
30
  "blame shifting": 0.8,
31
  "contradictory statements": 0.75,
32
- "threat": 1.5
 
 
 
 
 
 
33
  }
34
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  DARVO_PATTERNS = {
36
  "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
37
  }
@@ -43,6 +97,7 @@ DARVO_MOTIFS = [
43
  "You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.",
44
  "You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.",
45
  "You’re always so dramatic.", "You’re just trying to make me look bad.",
 
46
  "You’re crazy.", "You’re the one with the problem.", "You’re always so negative.",
47
  "You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.",
48
  "You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.",
@@ -53,6 +108,7 @@ DARVO_MOTIFS = [
53
  "You’re the one who’s always making me look like the bad guy.",
54
  "You’re the one who’s always making me feel like a failure.",
55
  "You’re the one who’s always making me feel like I’m not good enough.",
 
56
  "I can’t believe you’re doing this to me.", "You’re hurting me.",
57
  "You’re making me feel like a terrible person.", "You’re always blaming me for everything.",
58
  "You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.",
@@ -68,28 +124,6 @@ DARVO_MOTIFS = [
68
  "You’re the one who’s always making me feel like I’m the one who’s abusive.",
69
  "You’re the one who’s always making me feel like I’m the one who’s toxic."
70
  ]
71
-
72
- RISK_STAGE_LABELS = {
73
- 1: "πŸŒ€ Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
74
- 2: "πŸ”₯ Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
75
- 3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attemptβ€”apologies or emotional repair without accountability.",
76
- 4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
77
- }
78
-
79
- ESCALATION_QUESTIONS = [
80
- ("Partner has access to firearms or weapons", 4),
81
- ("Partner threatened to kill you", 3),
82
- ("Partner threatened you with a weapon", 3),
83
- ("Partner has ever choked you", 4),
84
- ("Partner injured or threatened your pet(s)", 3),
85
- ("Partner has broken your things, punched walls, or thrown objects", 2),
86
- ("Partner forced or coerced you into unwanted sexual acts", 3),
87
- ("Partner threatened to take away your children", 2),
88
- ("Violence has increased in frequency or severity", 3),
89
- ("Partner monitors your calls, GPS, or social media", 2)
90
- ]
91
-
92
- # β€”β€”β€” Helper Functions β€”β€”β€”
93
  def detect_contradiction(message):
94
  patterns = [
95
  (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
@@ -99,251 +133,247 @@ def detect_contradiction(message):
99
  (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
100
  (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
101
  ]
102
- return any(re.search(pat, message, flags) for pat, flags in patterns)
103
-
104
  def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
105
- # count how many DARVO patterns were triggered
106
  pattern_hits = len([p for p in patterns if p in DARVO_PATTERNS])
107
  pattern_score = pattern_hits / len(DARVO_PATTERNS)
108
-
109
- # measure how much sentiment shifts toward negativity
110
  sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
111
 
112
- # count DARVO motif occurrences
113
  motif_hits = len([
114
- m for m in motifs_found
115
- if any(phrase.lower() in m.lower() for phrase in DARVO_MOTIFS)
116
  ])
117
  motif_score = motif_hits / len(DARVO_MOTIFS)
118
 
119
- # direct contradiction indicator
120
  contradiction_score = 1.0 if contradiction_flag else 0.0
121
 
122
- # reweighted: pattern 25%, sentiment 30%, motifs 35%, contradiction 10%
123
- raw = (
124
- 0.25 * pattern_score
125
- + 0.30 * sentiment_shift_score
126
- + 0.35 * motif_score
127
- + 0.10 * contradiction_score
128
- )
129
- return round(min(raw, 1.0), 3)
130
-
131
- def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
132
- label = top_label.split(" – ")[0]
133
- why = {
134
- "control": "efforts to restrict autonomy.",
135
- "gaslighting": "manipulating perception.",
136
- "dismissiveness": "invalidating experience.",
137
- "insults": "direct insults erode safety.",
138
- "threat": "threatening language predicts harm.",
139
- "blame shifting": "avoiding accountability.",
140
- "guilt tripping": "inducing guilt to control behavior.",
141
- "recovery phase": "tension-reset without change.",
142
- "projection": "attributing faults to the other person."
143
- }.get(label, "This message contains concerning patterns.")
144
- if abuse_score>=85 or escalation_score>=16:
145
- lvl = "high"
146
- elif abuse_score>=60 or escalation_score>=8:
147
- lvl = "moderate"
148
- else:
149
- lvl = "low"
150
- return f"\n\nπŸ›‘ Risk Level: {lvl.capitalize()}\nThis message shows **{label}**.\nπŸ’‘ Why: {why}\n"
151
-
152
  def detect_weapon_language(text):
153
- kws = ["knife","gun","bomb","kill you","shoot","explode"]
154
- t = text.lower()
155
- return any(k in t for k in kws)
156
-
 
 
 
 
157
  def get_risk_stage(patterns, sentiment):
158
  if "threat" in patterns or "insults" in patterns:
159
  return 2
160
- if "control" in patterns or "guilt tripping" in patterns:
161
- return 1
162
- if "recovery phase" in patterns:
163
  return 3
164
- if sentiment=="supportive" and any(p in patterns for p in ["projection","dismissiveness"]):
 
 
165
  return 4
166
  return 1
167
 
168
- def generate_abuse_score_chart(dates,scores,labels):
169
- try:
170
- parsed=[datetime.strptime(d,"%Y-%m-%d") for d in dates]
171
- except:
172
- parsed=range(len(dates))
173
- fig,ax=plt.subplots(figsize=(8,3))
174
- ax.plot(parsed,scores,marker='o',linestyle='-',color='darkred',linewidth=2)
175
- for i,(x,y) in enumerate(zip(parsed,scores)):
176
- ax.text(x,y+2,f"{labels[i]}\n{int(y)}%",ha='center',fontsize=8)
177
- ax.set(title="Abuse Intensity Over Time",xlabel="Date",ylabel="Abuse Score (%)")
178
- ax.set_ylim(0,105);ax.grid(True);plt.tight_layout()
179
- buf=io.BytesIO();plt.savefig(buf,format='png');buf.seek(0)
180
- return Image.open(buf)
 
 
 
 
 
 
 
 
 
 
 
181
 
182
- # β€”β€”β€” Load Models & Pipelines β€”β€”β€”
183
- model_name="SamanthaStorm/tether-multilabel-v2"
184
- model=AutoModelForSequenceClassification.from_pretrained(model_name)
185
- tokenizer=AutoTokenizer.from_pretrained(model_name, use_fast=False)
186
- healthy_detector=pipeline("text-classification",model="distilbert-base-uncased-finetuned-sst-2-english")
187
- sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
188
 
189
- # β€”β€”β€” Single-Message Analysis β€”β€”β€”
190
- def analyze_single_message(text):
191
- # healthy bypass
192
- h = healthy_detector(text)[0]
193
-
194
- # 1) Strongly positive β†’ healthy
195
- if h['label'] == "POSITIVE" and h['score'] > 0.8:
196
- return {
197
- "abuse_score": 0,
198
- "labels": [],
199
- "sentiment": "supportive",
200
- "stage": 4,
201
- "darvo_score": 0.0,
202
- "top_patterns": []
203
- }
204
-
205
- # 2) Mildly negative/neutral β†’ also healthy
206
- elif h['label'] == "NEGATIVE" and h['score'] < 0.6:
207
- return {
208
- "abuse_score": 0,
209
- "labels": [],
210
- "sentiment": "supportive",
211
- "stage": 4,
212
- "darvo_score": 0.0,
213
- "top_patterns": []
214
- }
215
-
216
- # β€” if neither healthy case, continue on to actual abuse detection β€”
217
- inp = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
218
- with torch.no_grad(): logits=model(**inp).logits.squeeze(0)
219
- probs=torch.sigmoid(logits).numpy()
220
- # …run tokenizer, get `probs` and then:
221
- labels = [lab for lab,p in zip(LABELS, probs) if p > THRESHOLDS[lab]]
222
-
223
- # **NEW**: if absolutely no pattern is detected, force a zero‐abuse β€œhealthy” return:
224
- if not labels:
225
- return {
226
- "abuse_score": 0,
227
- "labels": [],
228
- "sentiment": "supportive",
229
- "stage": 4,
230
- "darvo_score": 0.0,
231
- "top_patterns": []
232
- }
233
 
234
- # abuse score
235
- total_w=sum(PATTERN_WEIGHTS.get(l,1.0) for l in LABELS)
236
- abuse_score=int(round(sum(probs[i]*PATTERN_WEIGHTS.get(l,1.0)
237
- for i,l in enumerate(LABELS))/total_w*100))
238
- # sentiment shift
239
- sst=sst_pipeline(text)[0]
240
- sentiment='supportive' if sst['label']=='POSITIVE' else 'undermining'
241
- sent_score=sst['score'] if sentiment=='undermining' else 0.0
242
- # DARVO
243
- motif_hits, matched = detect_motifs(text)
244
- contradiction=detect_contradiction(text)
245
- darvo_score=calculate_darvo_score(labels,0.0,sent_score,matched,contradiction)
246
- # stage + weapon
247
- stage=get_risk_stage(labels,sentiment)
248
- if detect_weapon_language(text):
249
- abuse_score=min(abuse_score+25,100)
250
- stage=max(stage,2)
251
- # top patterns
252
- top_patterns=sorted(zip(LABELS,probs), key=lambda x:x[1], reverse=True)[:2]
253
- return {
254
- "abuse_score":abuse_score, "labels":labels, "sentiment":sentiment,
255
- "stage":stage, "darvo_score":darvo_score, "top_patterns":top_patterns
256
  }
257
 
258
- # β€”β€”β€” Composite Analysis & UI β€”β€”β€”
259
- def analyze_composite(m1, d1, m2, d2, m3, d3, *answers):
260
- # determine if β€œNone of the above” was the only checked box
261
- none_sel = answers[-1] and not any(answers[:-1])
262
- if none_sel:
263
- esc_score = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  risk_level = "unknown"
265
  else:
266
- esc_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers[:-1]) if a)
267
- risk_level = "High" if esc_score >= 16 else "Moderate" if esc_score >= 8 else "Low"
268
-
269
- # collect only non-empty messages
270
- msgs = [m1, m2, m3]
271
- dates = [d1, d2, d3]
272
- active = [(m, d) for m, d in zip(msgs, dates) if m.strip()]
 
 
 
273
  if not active:
274
  return "Please enter at least one message."
275
 
276
- out = "" # <-- ADD THIS LINE
277
-
278
- # analyze each message
279
- results = [(analyze_single_message(m), d) for m, d in active]
280
-
281
- # pull out scores and labels
282
- abuse_scores = [res["abuse_score"] for res,_ in results]
283
- # NEW: per-message mapping
284
- for i, score in enumerate(abuse_scores, 1):
285
- if score >= 85: lvl = "High"
286
- elif score >= 60: lvl = "Moderate"
287
- else: lvl = "Low"
288
- out += f"Message {i}: {score}% ({lvl})\n"
289
-
290
- # now your existing composite logic
291
- composite_abuse = int(round(sum(abuse_scores)/len(abuse_scores)))
292
- out += f"Composite Abuse: {composite_abuse}%\n"
293
- # … etc. …
294
- top_labels = [res["top_patterns"][0][0] if res["top_patterns"] else "None" for res, _ in results]
295
- dates_used = [d or "Undated" for _, d in results]
296
- stages = [res["stage"] for res, _ in results]
297
-
298
- # overall risk stage & composite abuse
299
  most_common_stage = max(set(stages), key=stages.count)
300
- composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
 
 
 
 
 
 
301
 
302
- # build the basic output text
303
  out = f"Abuse Intensity: {composite_abuse}%\n"
304
- if esc_score is None:
 
 
305
  out += "Escalation Potential: Unknown (Checklist not completed)\n"
 
306
  else:
307
- total_possible = sum(w for _, w in ESCALATION_QUESTIONS)
308
- out += f"Escalation Potential: {risk_level} ({esc_score}/{total_possible})\n"
309
-
310
- # if zero abuse, skip risk snippet & DARVO
311
- img = generate_abuse_score_chart(dates_used, abuse_scores, top_labels)
312
- if composite_abuse == 0:
313
- return out, img
314
-
315
- # compute DARVO summary
316
- darvos = [res["darvo_score"] for res, _ in results]
317
- avg_darvo = round(sum(darvos) / len(darvos), 3)
318
- darvo_blurb = (
319
- f"\n🎭 DARVO Score: {avg_darvo} ({'high' if avg_darvo >= 0.65 else 'moderate'})"
320
- if avg_darvo > 0.25 else ""
321
- )
322
 
323
- # risk snippet (uses your generate_risk_snippet helper)
324
- first_pattern = top_labels[0]
325
- first_score = int(results[0][0]["top_patterns"][0][1] * 100) if results[0][0]["top_patterns"] else 0
326
- pattern_score = f"{first_pattern} – {first_score}%"
327
- out += generate_risk_snippet(composite_abuse, pattern_score, esc_score or 0, most_common_stage)
328
  out += darvo_blurb
329
 
330
- return out, img
331
-
332
- # β€”β€”β€” Gradio Interface β€”β€”β€”
 
333
  message_date_pairs = [
334
- (gr.Textbox(label=f"Message {i+1}"), gr.Textbox(label=f"Date {i+1} (optional)", placeholder="YYYY-MM-DD"))
 
 
 
335
  for i in range(3)
336
  ]
337
- quiz_boxes = [gr.Checkbox(label=q) for q,_ in ESCALATION_QUESTIONS]
 
338
  none_box = gr.Checkbox(label="None of the above")
339
 
340
  iface = gr.Interface(
341
  fn=analyze_composite,
342
- inputs=[item for pair in message_date_pairs for item in pair] + quiz_boxes + [none_box],
343
- outputs=[gr.Textbox(label="Results"), gr.Image(label="Risk Stage Timeline", type="pil")],
344
- title="Tether Abuse Pattern Detector v2",
 
 
 
345
  allow_flagging="manual"
346
  )
347
 
348
  if __name__ == "__main__":
349
- iface.launch()
 
1
  import gradio as gr
2
  import torch
3
  import numpy as np
4
+ from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer
5
  from motif_tagging import detect_motifs
6
  import re
7
  import matplotlib.pyplot as plt
 
9
  from PIL import Image
10
  from datetime import datetime
11
 
12
+ # --- Timeline Visualization Function ---
13
+ def generate_abuse_score_chart(dates, scores, labels):
14
+ import matplotlib.pyplot as plt
15
+ import io
16
+ from PIL import Image
17
+ from datetime import datetime
18
+
19
+ try:
20
+ parsed_dates = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
21
+ except Exception:
22
+ parsed_dates = list(range(len(dates)))
23
+
24
+ fig, ax = plt.subplots(figsize=(8, 3))
25
+ ax.plot(parsed_dates, scores, marker='o', linestyle='-', color='darkred', linewidth=2)
26
+
27
+ for i, (x, y) in enumerate(zip(parsed_dates, scores)):
28
+ label = labels[i]
29
+ ax.text(x, y + 2, f"{label}\n{int(y)}%", ha='center', fontsize=8, color='black')
30
+
31
+ ax.set_title("Abuse Intensity Over Time")
32
+ ax.set_xlabel("Date")
33
+ ax.set_ylabel("Abuse Score (%)")
34
+ ax.set_ylim(0, 105)
35
+ ax.grid(True)
36
+ plt.tight_layout()
37
+
38
+ buf = io.BytesIO()
39
+ plt.savefig(buf, format='png')
40
+ buf.seek(0)
41
+ return Image.open(buf)
42
+ # --- SST Sentiment Model ---
43
+ sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
44
+
45
+ # --- Abuse Model ---
46
+ model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
47
+ model = RobertaForSequenceClassification.from_pretrained(model_name)
48
+ tokenizer = RobertaTokenizer.from_pretrained(model_name)
49
+
50
  LABELS = [
51
  "blame shifting", "contradictory statements", "control", "dismissiveness",
52
  "gaslighting", "guilt tripping", "insults", "obscure language",
53
  "projection", "recovery phase", "threat"
54
  ]
55
 
 
56
  THRESHOLDS = {
57
+ "blame shifting": 0.3, "contradictory statements": 0.36, "control": 0.48, "dismissiveness": 0.45,
58
+ "gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
59
+ "projection": 0.35, "recovery phase": 0.25, "threat": 0.25
60
  }
61
 
62
  PATTERN_WEIGHTS = {
 
65
  "dismissiveness": 0.8,
66
  "blame shifting": 0.8,
67
  "contradictory statements": 0.75,
68
+ "threat": 1.5 # πŸ”§ New: raise weight for threat
69
+ }
70
+ RISK_STAGE_LABELS = {
71
+ 1: "πŸŒ€ Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
72
+ 2: "πŸ”₯ Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
73
+ 3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attemptβ€”apologies or emotional repair without accountability.",
74
+ 4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
75
  }
76
 
77
+ ESCALATION_QUESTIONS = [
78
+ ("Partner has access to firearms or weapons", 4),
79
+ ("Partner threatened to kill you", 3),
80
+ ("Partner threatened you with a weapon", 3),
81
+ ("Partner has ever choked you, even if you considered it consensual at the time", 4),
82
+ ("Partner injured or threatened your pet(s)", 3),
83
+ ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
84
+ ("Partner forced or coerced you into unwanted sexual acts", 3),
85
+ ("Partner threatened to take away your children", 2),
86
+ ("Violence has increased in frequency or severity", 3),
87
+ ("Partner monitors your calls/GPS/social media", 2)
88
+ ]
89
  DARVO_PATTERNS = {
90
  "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
91
  }
 
97
  "You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.",
98
  "You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.",
99
  "You’re always so dramatic.", "You’re just trying to make me look bad.",
100
+
101
  "You’re crazy.", "You’re the one with the problem.", "You’re always so negative.",
102
  "You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.",
103
  "You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.",
 
108
  "You’re the one who’s always making me look like the bad guy.",
109
  "You’re the one who’s always making me feel like a failure.",
110
  "You’re the one who’s always making me feel like I’m not good enough.",
111
+
112
  "I can’t believe you’re doing this to me.", "You’re hurting me.",
113
  "You’re making me feel like a terrible person.", "You’re always blaming me for everything.",
114
  "You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.",
 
124
  "You’re the one who’s always making me feel like I’m the one who’s abusive.",
125
  "You’re the one who’s always making me feel like I’m the one who’s toxic."
126
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  def detect_contradiction(message):
128
  patterns = [
129
  (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
 
133
  (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
134
  (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
135
  ]
136
+ return any(re.search(p, message, flags) for p, flags in patterns)
137
+
138
  def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
 
139
  pattern_hits = len([p for p in patterns if p in DARVO_PATTERNS])
140
  pattern_score = pattern_hits / len(DARVO_PATTERNS)
141
+
 
142
  sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
143
 
 
144
  motif_hits = len([
145
+ motif for motif in motifs_found
146
+ if any(phrase.lower() in motif.lower() for phrase in DARVO_MOTIFS)
147
  ])
148
  motif_score = motif_hits / len(DARVO_MOTIFS)
149
 
 
150
  contradiction_score = 1.0 if contradiction_flag else 0.0
151
 
152
+ return round(min(
153
+ 0.3 * pattern_score +
154
+ 0.3 * sentiment_shift_score +
155
+ 0.25 * motif_score +
156
+ 0.15 * contradiction_score, 1.0
157
+ ), 3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  def detect_weapon_language(text):
159
+ weapon_keywords = [
160
+ "knife", "knives", "stab", "cut you", "cutting",
161
+ "gun", "shoot", "rifle", "firearm", "pistol",
162
+ "bomb", "blow up", "grenade", "explode",
163
+ "weapon", "armed", "loaded", "kill you", "take you out"
164
+ ]
165
+ text_lower = text.lower()
166
+ return any(word in text_lower for word in weapon_keywords)
167
  def get_risk_stage(patterns, sentiment):
168
  if "threat" in patterns or "insults" in patterns:
169
  return 2
170
+ elif "recovery phase" in patterns:
 
 
171
  return 3
172
+ elif "control" in patterns or "guilt tripping" in patterns:
173
+ return 1
174
+ elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
175
  return 4
176
  return 1
177
 
178
+ def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
179
+ if abuse_score >= 85 or escalation_score >= 16:
180
+ risk_level = "high"
181
+ elif abuse_score >= 60 or escalation_score >= 8:
182
+ risk_level = "moderate"
183
+ elif stage == 2 and abuse_score >= 40:
184
+ risk_level = "moderate" # πŸ”§ New rule for escalation stage
185
+ else:
186
+ risk_level = "low"
187
+ pattern_label = top_label.split(" – ")[0]
188
+ pattern_score = top_label.split(" – ")[1] if " – " in top_label else ""
189
+
190
+ WHY_FLAGGED = {
191
+ "control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
192
+ "gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
193
+ "dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
194
+ "insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
195
+ "threat": "This message includes threatening language, which is a strong predictor of harm.",
196
+ "blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
197
+ "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
198
+ "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
199
+ "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
200
+ "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
201
+ }
202
 
203
+ explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
 
 
 
 
 
204
 
205
+ base = f"\n\nπŸ›‘ Risk Level: {risk_level.capitalize()}\n"
206
+ base += f"This message shows strong indicators of **{pattern_label}**. "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
+ if risk_level == "high":
209
+ base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
210
+ elif risk_level == "moderate":
211
+ base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
212
+ else:
213
+ base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
214
+
215
+ base += f"\nπŸ’‘ *Why this might be flagged:*\n{explanation}\n"
216
+ base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
217
+ base += "🧠 You can review the pattern in context. This tool highlights possible dynamicsβ€”not judgments."
218
+
219
+ return base
220
+ def analyze_single_message(text, thresholds):
221
+ motif_hits, matched_phrases = detect_motifs(text)
222
+ result = sst_pipeline(text)[0]
223
+ sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
224
+ sentiment_score = result['score'] if sentiment == "undermining" else 0.0
225
+ weapon_flag = detect_weapon_language(text)
226
+ adjusted_thresholds = {
227
+ k: v + 0.05 if sentiment == "supportive" else v
228
+ for k, v in thresholds.items()
 
229
  }
230
 
231
+ contradiction_flag = detect_contradiction(text)
232
+
233
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
234
+ with torch.no_grad():
235
+ outputs = model(**inputs)
236
+ scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
237
+
238
+ threshold_labels = [
239
+ label for label, score in zip(LABELS, scores)
240
+ if score > adjusted_thresholds[label]
241
+ ]
242
+
243
+ motifs = [phrase for _, phrase in matched_phrases]
244
+
245
+ darvo_score = calculate_darvo_score(
246
+ threshold_labels,
247
+ sentiment_before=0.0,
248
+ sentiment_after=sentiment_score,
249
+ motifs_found=motifs,
250
+ contradiction_flag=contradiction_flag
251
+ )
252
+ top_patterns = sorted(
253
+ [(label, score) for label, score in zip(LABELS, scores)],
254
+ key=lambda x: x[1],
255
+ reverse=True
256
+ )[:2]
257
+
258
+ # Compute weighted average across all patterns (not just top 2)
259
+ weighted_total = 0.0
260
+ weight_sum = 0.0
261
+ for label, score in zip(LABELS, scores):
262
+ weight = PATTERN_WEIGHTS.get(label, 1.0)
263
+ weighted_total += score * weight
264
+ weight_sum += weight
265
+
266
+ abuse_score_raw = (weighted_total / weight_sum) * 100
267
+ stage = get_risk_stage(threshold_labels, sentiment)
268
+ if weapon_flag:
269
+ abuse_score_raw = min(abuse_score_raw + 25, 100) # boost intensity
270
+ if weapon_flag and stage < 2:
271
+ stage = 2
272
+ if weapon_flag:
273
+ print("⚠️ Weapon-related language detected.")
274
+
275
+ if "threat" in threshold_labels or "control" in threshold_labels or "insults" in threshold_labels:
276
+ abuse_score = min(abuse_score_raw, 100)
277
+ else:
278
+ abuse_score = min(abuse_score_raw, 95)
279
+
280
+
281
+
282
+ print("\n--- Debug Info ---")
283
+ print(f"Text: {text}")
284
+ print(f"Sentiment: {sentiment} (raw: {result['label']}, score: {result['score']:.3f})")
285
+ print("Abuse Pattern Scores:")
286
+ for label, score in zip(LABELS, scores):
287
+ passed = "βœ…" if score > adjusted_thresholds[label] else "❌"
288
+ print(f" {label:25} β†’ {score:.3f} {passed}")
289
+ print(f"Motifs: {motifs}")
290
+ print(f"Contradiction: {contradiction_flag}")
291
+ print("------------------\n")
292
+
293
+ return abuse_score, threshold_labels, top_patterns, result, stage, darvo_score
294
+
295
+ def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none):
296
+ none_selected_checked = answers_and_none[-1]
297
+ responses_checked = any(answers_and_none[:-1])
298
+ none_selected = not responses_checked and none_selected_checked
299
+
300
+ if none_selected:
301
+ escalation_score = None
302
  risk_level = "unknown"
303
  else:
304
+ escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a)
305
+ risk_level = (
306
+ "High" if escalation_score >= 16 else
307
+ "Moderate" if escalation_score >= 8 else
308
+ "Low"
309
+ )
310
+
311
+ messages = [msg1, msg2, msg3]
312
+ dates = [date1, date2, date3]
313
+ active = [(m, d) for m, d in zip(messages, dates) if m.strip()]
314
  if not active:
315
  return "Please enter at least one message."
316
 
317
+ results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
318
+ abuse_scores = [r[0][0] for r in results]
319
+ top_labels = [r[0][2][0][0] for r in results]
320
+ top_scores = [r[0][2][0][1] for r in results]
321
+ sentiments = [r[0][3]['label'] for r in results]
322
+ stages = [r[0][4] for r in results]
323
+ darvo_scores = [r[0][5] for r in results]
324
+ dates_used = [r[1] or "Undated" for r in results] # Store dates for future mapping
325
+
326
+ composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
327
+ top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
328
+
 
 
 
 
 
 
 
 
 
 
 
329
  most_common_stage = max(set(stages), key=stages.count)
330
+ stage_text = RISK_STAGE_LABELS[most_common_stage]
331
+
332
+ avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
333
+ darvo_blurb = ""
334
+ if avg_darvo > 0.25:
335
+ level = "moderate" if avg_darvo < 0.65 else "high"
336
+ darvo_blurb = f"\n\n🎭 **DARVO Score: {avg_darvo}** β†’ This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
337
 
 
338
  out = f"Abuse Intensity: {composite_abuse}%\n"
339
+ out += "πŸ“Š This reflects the strength and severity of detected abuse patterns in the message(s).\n\n"
340
+
341
+ if escalation_score is None:
342
  out += "Escalation Potential: Unknown (Checklist not completed)\n"
343
+ out += "πŸ” *This section was not completed. Escalation potential is unknown.*\n\n"
344
  else:
345
+ out += f"Escalation Potential: {risk_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})\n"
346
+ out += "🚨 This indicates how many serious risk factors are present based on your answers to the safety checklist.\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
+ out += generate_risk_snippet(composite_abuse, top_label, escalation_score if escalation_score is not None else 0, most_common_stage)
349
+ out += f"\n\n{stage_text}"
 
 
 
350
  out += darvo_blurb
351
 
352
+ pattern_labels = [r[0][2][0][0] for r in results] # top label for each message
353
+ timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, pattern_labels)
354
+ return out, timeline_image
355
+
356
  message_date_pairs = [
357
+ (
358
+ gr.Textbox(label=f"Message {i+1}"),
359
+ gr.Textbox(label=f"Date {i+1} (optional)", placeholder="YYYY-MM-DD")
360
+ )
361
  for i in range(3)
362
  ]
363
+ textbox_inputs = [item for pair in message_date_pairs for item in pair]
364
+ quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
365
  none_box = gr.Checkbox(label="None of the above")
366
 
367
  iface = gr.Interface(
368
  fn=analyze_composite,
369
+ inputs=textbox_inputs + quiz_boxes + [none_box],
370
+ outputs=[
371
+ gr.Textbox(label="Results"),
372
+ gr.Image(label="Risk Stage Timeline", type="pil")
373
+ ],
374
+ title="Abuse Pattern Detector + Escalation Quiz",
375
  allow_flagging="manual"
376
  )
377
 
378
  if __name__ == "__main__":
379
+ iface.launch().