SamanthaStorm commited on
Commit
f32b7e3
·
verified ·
1 Parent(s): fd6c90c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -104
app.py CHANGED
@@ -6,24 +6,17 @@ from transformers import RobertaForSequenceClassification, RobertaTokenizer
6
  from motif_tagging import detect_motifs
7
  import re
8
 
9
- # --- Sentiment Model: T5-based Emotion Classifier ---
10
  sentiment_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
11
  sentiment_model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-emotion")
12
 
13
  EMOTION_TO_SENTIMENT = {
14
- "joy": "supportive",
15
- "love": "supportive",
16
- "surprise": "supportive",
17
- "neutral": "supportive",
18
- "sadness": "undermining",
19
- "anger": "undermining",
20
- "fear": "undermining",
21
- "disgust": "undermining",
22
- "shame": "undermining",
23
- "guilt": "undermining"
24
  }
25
 
26
- # --- Abuse Detection Model ---
27
  model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
28
  model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
29
  tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -35,69 +28,48 @@ LABELS = [
35
  ]
36
 
37
  THRESHOLDS = {
38
- "blame shifting": 0.3,
39
- "contradictory statements": 0.32,
40
- "control": 0.48,
41
- "dismissiveness": 0.45,
42
- "gaslighting": 0.30,
43
- "guilt tripping": 0.20,
44
- "insults": 0.34,
45
- "obscure language": 0.25,
46
- "projection": 0.35,
47
- "recovery phase": 0.25,
48
- "threat": 0.25
49
  }
50
 
51
  PATTERN_WEIGHTS = {
52
- "gaslighting": 1.3,
53
- "control": 1.2,
54
- "dismissiveness": 0.8,
55
- "blame shifting": 0.8,
56
- "contradictory statements": 0.75
57
  }
58
 
59
  EXPLANATIONS = {
60
- "blame shifting": "Blame-shifting is when one person redirects responsibility onto someone else to avoid accountability.",
61
- "contradictory statements": "Contradictory statements confuse the listener by flipping positions or denying previous claims.",
62
- "control": "Control restricts another person’s autonomy through coercion, manipulation, or threats.",
63
- "dismissiveness": "Dismissiveness is belittling or disregarding another person’s feelings, needs, or opinions.",
64
- "gaslighting": "Gaslighting involves making someone question their own reality, memory, or perceptions.",
65
- "guilt tripping": "Guilt-tripping uses guilt to manipulate someone’s actions or decisions.",
66
- "insults": "Insults are derogatory or demeaning remarks meant to shame, belittle, or hurt someone.",
67
- "obscure language": "Obscure language manipulates through complexity, vagueness, or superiority to confuse the other person.",
68
- "projection": "Projection accuses someone else of the very behaviors or intentions the speaker is exhibiting.",
69
- "recovery phase": "Recovery phase statements attempt to soothe or reset tension without acknowledging harm or change.",
70
- "threat": "Threats use fear of harm (physical, emotional, or relational) to control or intimidate someone."
71
  }
72
 
73
  RISK_SNIPPETS = {
74
  "low": (
75
  "🟢 Risk Level: Low",
76
  "The language patterns here do not strongly indicate abuse.",
77
- "Continue to check in with yourself and notice how you feel in response to repeated patterns."
78
  ),
79
  "moderate": (
80
  "⚠️ Risk Level: Moderate to High",
81
- "This language includes control, guilt, or reversal tactics.",
82
- "These patterns often lead to emotional confusion and reduced self-trust. Document these messages or talk with someone safe."
83
  ),
84
  "high": (
85
  "🛑 Risk Level: High",
86
- "Language includes threats or coercive control, which are strong indicators of escalation.",
87
- "Consider creating a safety plan or contacting a support line. Trust your sense of unease."
88
  )
89
  }
90
 
91
- def generate_risk_snippet(abuse_score, top_label):
92
- if abuse_score >= 85:
93
- risk_level = "high"
94
- elif abuse_score >= 60:
95
- risk_level = "moderate"
96
- else:
97
- risk_level = "low"
98
- title, summary, advice = RISK_SNIPPETS[risk_level]
99
- return f"\n\n{title}\n{summary} (Pattern: **{top_label}**)\n💡 {advice}"
100
-
101
  DARVO_PATTERNS = {
102
  "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
103
  }
@@ -107,8 +79,21 @@ DARVO_MOTIFS = [
107
  "you’re attacking me", "i’m done trying", "i’m the only one who cares"
108
  ]
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def detect_contradiction(message):
111
- contradiction_phrases = [
112
  (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
113
  (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
114
  (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
@@ -116,72 +101,56 @@ def detect_contradiction(message):
116
  (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
117
  (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
118
  ]
119
- return any(re.search(pattern, message, flags) for pattern, flags in contradiction_phrases)
120
 
121
  def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
122
- pattern_hits = len([p.lower() for p in patterns if p.lower() in DARVO_PATTERNS])
123
  pattern_score = pattern_hits / len(DARVO_PATTERNS)
124
  sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
125
- motif_hits = len([m.lower() for m in motifs_found if m.lower() in DARVO_MOTIFS])
126
  motif_score = motif_hits / len(DARVO_MOTIFS)
127
  contradiction_score = 1.0 if contradiction_flag else 0.0
128
- darvo_score = (
129
- 0.3 * pattern_score +
130
- 0.3 * sentiment_shift_score +
131
- 0.25 * motif_score +
132
- 0.15 * contradiction_score
133
- )
134
- return round(min(darvo_score, 1.0), 3)
135
 
136
- ESCALATION_QUESTIONS = [
137
- ("Partner has access to firearms or weapons", 4),
138
- ("Partner threatened to kill you", 3),
139
- ("Partner threatened you with a weapon", 3),
140
- ("Partner has ever choked you, even if you considered it consensual at the time", 4),
141
- ("Partner injured or threatened your pet(s)", 3),
142
- ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
143
- ("Partner forced or coerced you into unwanted sexual acts", 3),
144
- ("Partner threatened to take away your children", 2),
145
- ("Violence has increased in frequency or severity", 3),
146
- ("Partner monitors your calls/GPS/social media", 2)
147
- ]
148
 
149
  def analyze_single_message(text, thresholds, motif_flags):
150
  motif_hits, matched_phrases = detect_motifs(text)
151
 
152
- # Sentiment Analysis
153
  input_ids = sentiment_tokenizer(f"emotion: {text}", return_tensors="pt").input_ids
154
  with torch.no_grad():
155
- outputs = sentiment_model.generate(input_ids)
156
- emotion = sentiment_tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()
157
  sentiment = EMOTION_TO_SENTIMENT.get(emotion, "undermining")
158
  sentiment_score = 0.5 if sentiment == "undermining" else 0.0
159
 
160
- # Contradiction Check
161
- contradiction_flag = detect_contradiction(text)
 
 
 
162
 
163
- # Motifs
164
- motifs = [phrase for _, phrase in matched_phrases]
165
 
166
- # Model Prediction
167
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
168
  with torch.no_grad():
169
  outputs = model(**inputs)
170
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
171
 
172
- threshold_labels = [label for label, score in zip(LABELS, scores) if score > thresholds[label]]
173
- top_patterns = sorted([(label, score) for label, score in zip(LABELS, scores)], key=lambda x: x[1], reverse=True)[:2]
174
  pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
175
 
 
176
  darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
177
 
178
- return (
179
- np.mean([score for _, score in top_patterns]) * 100,
180
- threshold_labels,
181
- top_patterns,
182
- darvo_score,
183
- {"label": sentiment, "emotion": emotion}
184
- )
185
  def analyze_composite(msg1, msg2, msg3, *answers_and_none):
186
  responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
187
  none_selected = answers_and_none[-1]
@@ -196,23 +165,19 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
196
  results = [analyze_single_message(m, THRESHOLDS.copy(), []) for m in active]
197
  abuse_scores = [r[0] for r in results]
198
  darvo_scores = [r[3] for r in results]
199
- top_pattern = max({label for r in results for label in r[2]}, key=lambda l: abuse_scores[0])
200
- composite_abuse = int(round(sum(abuse_scores)/len(abuse_scores)))
201
- avg_darvo = round(sum(darvo_scores)/len(darvo_scores), 3)
202
 
203
  out = f"Abuse Intensity: {composite_abuse}%\n"
204
- out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _,w in ESCALATION_QUESTIONS)})"
205
- out += generate_risk_snippet(composite_abuse, top_pattern[0])
206
  if avg_darvo > 0.25:
207
  level = "moderate" if avg_darvo < 0.65 else "high"
208
  out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
209
  return out
210
 
211
- textbox_inputs = [
212
- gr.Textbox(label="Message 1"),
213
- gr.Textbox(label="Message 2"),
214
- gr.Textbox(label="Message 3")
215
- ]
216
  quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
217
  none_box = gr.Checkbox(label="None of the above")
218
 
 
6
  from motif_tagging import detect_motifs
7
  import re
8
 
9
+ # --- Sentiment Model ---
10
  sentiment_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
11
  sentiment_model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-emotion")
12
 
13
  EMOTION_TO_SENTIMENT = {
14
+ "joy": "supportive", "love": "supportive", "surprise": "supportive", "neutral": "supportive",
15
+ "sadness": "undermining", "anger": "undermining", "fear": "undermining",
16
+ "disgust": "undermining", "shame": "undermining", "guilt": "undermining"
 
 
 
 
 
 
 
17
  }
18
 
19
+ # --- Abuse Model ---
20
  model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
21
  model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
22
  tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
28
  ]
29
 
30
  THRESHOLDS = {
31
+ "blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.48, "dismissiveness": 0.45,
32
+ "gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
33
+ "projection": 0.35, "recovery phase": 0.25, "threat": 0.25
 
 
 
 
 
 
 
 
34
  }
35
 
36
  PATTERN_WEIGHTS = {
37
+ "gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8,
38
+ "blame shifting": 0.8, "contradictory statements": 0.75
 
 
 
39
  }
40
 
41
  EXPLANATIONS = {
42
+ "blame shifting": "Blame-shifting redirects responsibility to avoid accountability.",
43
+ "contradictory statements": "Flipping positions or denying previous claims.",
44
+ "control": "Attempts to restrict another person’s autonomy.",
45
+ "dismissiveness": "Disregarding or belittling someone’s feelings or needs.",
46
+ "gaslighting": "Manipulating someone into questioning their reality.",
47
+ "guilt tripping": "Using guilt to control or pressure.",
48
+ "insults": "Derogatory or demeaning language.",
49
+ "obscure language": "Vague, superior, or confusing language used manipulatively.",
50
+ "projection": "Accusing someone else of your own behaviors.",
51
+ "recovery phase": "Resetting tension without real change.",
52
+ "threat": "Using fear or harm to control or intimidate."
53
  }
54
 
55
  RISK_SNIPPETS = {
56
  "low": (
57
  "🟢 Risk Level: Low",
58
  "The language patterns here do not strongly indicate abuse.",
59
+ "Check in with yourself and monitor for repeated patterns."
60
  ),
61
  "moderate": (
62
  "⚠️ Risk Level: Moderate to High",
63
+ "Language includes control, guilt, or reversal tactics.",
64
+ "These patterns reduce self-trust. Document or talk with someone safe."
65
  ),
66
  "high": (
67
  "🛑 Risk Level: High",
68
+ "Strong indicators of coercive control or threat present.",
69
+ "Consider building a safety plan or contacting support."
70
  )
71
  }
72
 
 
 
 
 
 
 
 
 
 
 
73
  DARVO_PATTERNS = {
74
  "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
75
  }
 
79
  "you’re attacking me", "i’m done trying", "i’m the only one who cares"
80
  ]
81
 
82
+ ESCALATION_QUESTIONS = [
83
+ ("Partner has access to firearms or weapons", 4),
84
+ ("Partner threatened to kill you", 3),
85
+ ("Partner threatened you with a weapon", 3),
86
+ ("Partner has ever choked you, even if you considered it consensual at the time", 4),
87
+ ("Partner injured or threatened your pet(s)", 3),
88
+ ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
89
+ ("Partner forced or coerced you into unwanted sexual acts", 3),
90
+ ("Partner threatened to take away your children", 2),
91
+ ("Violence has increased in frequency or severity", 3),
92
+ ("Partner monitors your calls/GPS/social media", 2)
93
+ ]
94
+
95
  def detect_contradiction(message):
96
+ patterns = [
97
  (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
98
  (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
99
  (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
 
101
  (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
102
  (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
103
  ]
104
+ return any(re.search(p, message, flags) for p, flags in patterns)
105
 
106
  def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
107
+ pattern_hits = len([p for p in patterns if p in DARVO_PATTERNS])
108
  pattern_score = pattern_hits / len(DARVO_PATTERNS)
109
  sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
110
+ motif_hits = len([m for m in motifs_found if m.lower() in DARVO_MOTIFS])
111
  motif_score = motif_hits / len(DARVO_MOTIFS)
112
  contradiction_score = 1.0 if contradiction_flag else 0.0
113
+ return round(min(0.3 * pattern_score + 0.3 * sentiment_shift_score + 0.25 * motif_score + 0.15 * contradiction_score, 1.0), 3)
 
 
 
 
 
 
114
 
115
+ def generate_risk_snippet(score, top_label):
116
+ level = "high" if score >= 85 else "moderate" if score >= 60 else "low"
117
+ title, summary, advice = RISK_SNIPPETS[level]
118
+ return f"\n\n{title}\n{summary} (Pattern: **{top_label}**)\n💡 {advice}"
 
 
 
 
 
 
 
 
119
 
120
  def analyze_single_message(text, thresholds, motif_flags):
121
  motif_hits, matched_phrases = detect_motifs(text)
122
 
123
+ # Sentiment
124
  input_ids = sentiment_tokenizer(f"emotion: {text}", return_tensors="pt").input_ids
125
  with torch.no_grad():
126
+ sentiment_out = sentiment_model.generate(input_ids)
127
+ emotion = sentiment_tokenizer.decode(sentiment_out[0], skip_special_tokens=True).lower()
128
  sentiment = EMOTION_TO_SENTIMENT.get(emotion, "undermining")
129
  sentiment_score = 0.5 if sentiment == "undermining" else 0.0
130
 
131
+ # Adjust thresholds
132
+ adjusted_thresholds = {
133
+ k: v * 0.8 if sentiment == "undermining" else v * 1.2 if sentiment == "supportive" else v
134
+ for k, v in thresholds.items()
135
+ }
136
 
137
+ contradiction_flag = detect_contradiction(text)
138
+ motifs = [text for _, text in matched_phrases]
139
 
 
140
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
141
  with torch.no_grad():
142
  outputs = model(**inputs)
143
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
144
 
145
+ threshold_labels = [l for l, s in zip(LABELS, scores) if s > adjusted_thresholds[l]]
146
+ top_patterns = sorted(zip(LABELS, scores), key=lambda x: x[1], reverse=True)[:2]
147
  pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
148
 
149
+ abuse_score = round(np.mean([s * PATTERN_WEIGHTS.get(l, 1.0) for l, s in top_patterns]) * 100, 2)
150
  darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
151
 
152
+ return abuse_score, threshold_labels, top_patterns, darvo_score, {"label": sentiment, "emotion": emotion}
153
+
 
 
 
 
 
154
  def analyze_composite(msg1, msg2, msg3, *answers_and_none):
155
  responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
156
  none_selected = answers_and_none[-1]
 
165
  results = [analyze_single_message(m, THRESHOLDS.copy(), []) for m in active]
166
  abuse_scores = [r[0] for r in results]
167
  darvo_scores = [r[3] for r in results]
168
+ top_label = max({label for r in results for label in r[2]}, key=lambda l: abuse_scores[0])
169
+ composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
170
+ avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
171
 
172
  out = f"Abuse Intensity: {composite_abuse}%\n"
173
+ out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
174
+ out += generate_risk_snippet(composite_abuse, top_label)
175
  if avg_darvo > 0.25:
176
  level = "moderate" if avg_darvo < 0.65 else "high"
177
  out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
178
  return out
179
 
180
+ textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
 
 
 
 
181
  quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
182
  none_box = gr.Checkbox(label="None of the above")
183