SamanthaStorm commited on
Commit
d315105
·
verified ·
1 Parent(s): 1dbc865

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -142
app.py CHANGED
@@ -98,152 +98,60 @@ def generate_risk_snippet(abuse_score, top_label):
98
  title, summary, advice = RISK_SNIPPETS[risk_level]
99
  return f"\n\n{title}\n{summary} (Pattern: **{top_label}**)\n💡 {advice}"
100
 
101
- # --- DARVO Detection ---
102
- DARVO_PATTERNS = {
103
- "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
104
- }
105
-
106
- DARVO_MOTIFS = [
107
- "i guess i’m the bad guy", "after everything i’ve done", "you always twist everything",
108
- "so now it’s all my fault", "i’m the villain", "i’m always wrong", "you never listen",
109
- "you’re attacking me", "i’m done trying", "i’m the only one who cares"
 
 
 
110
  ]
111
 
112
- def detect_contradiction(message):
113
- contradiction_flag = False
114
- contradiction_phrases = [
115
- (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
116
- (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
117
- (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
118
- (r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
119
- (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
120
- (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE),
121
- ]
122
- for pattern, flags in contradiction_phrases:
123
- if re.search(pattern, message, flags):
124
- contradiction_flag = True
125
- break
126
- return contradiction_flag
127
-
128
- def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
129
- pattern_hits = len([p.lower() for p in patterns if p.lower() in DARVO_PATTERNS])
130
- pattern_score = pattern_hits / len(DARVO_PATTERNS)
131
- sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
132
- motif_hits = len([m.lower() for m in motifs_found if m.lower() in DARVO_MOTIFS])
133
- motif_score = motif_hits / len(DARVO_MOTIFS)
134
- contradiction_score = 1.0 if contradiction_flag else 0.0
135
- darvo_score = (
136
- 0.3 * pattern_score +
137
- 0.3 * sentiment_shift_score +
138
- 0.25 * motif_score +
139
- 0.15 * contradiction_score
140
- )
141
- return round(min(darvo_score, 1.0), 3)
142
-
143
- # --- Sentiment Function ---
144
- def custom_sentiment(text):
145
- input_ids = sentiment_tokenizer(f"emotion: {text}", return_tensors="pt").input_ids
146
- with torch.no_grad():
147
- outputs = sentiment_model.generate(input_ids)
148
- emotion = sentiment_tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()
149
- sentiment = EMOTION_TO_SENTIMENT.get(emotion, "undermining")
150
- return {"label": sentiment, "emotion": emotion}
151
 
152
- # --- Abuse Scoring ---
153
- def calculate_abuse_level(scores, thresholds, motif_hits=None, flag_multiplier=1.0):
154
- weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0)
155
- for label, score in zip(LABELS, scores) if score > thresholds[label]]
156
- base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
157
- base_score *= flag_multiplier
158
- return min(base_score, 100.0)
159
 
160
- # --- Single Message Analysis ---
161
- def analyze_single_message(text, thresholds, motif_flags):
162
- motif_hits, matched_phrases = detect_motifs(text)
163
- sentiment = custom_sentiment(text)
164
- sentiment_score = 0.5 if sentiment["label"] == "undermining" else 0.0
165
- print(f"Detected emotion: {sentiment['emotion']} → sentiment: {sentiment['label']}")
166
- adjusted_thresholds = {k: v * 0.8 for k, v in thresholds.items()} if sentiment["label"] == "undermining" else thresholds.copy()
167
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
168
- with torch.no_grad():
169
- outputs = model(**inputs)
170
- scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
171
- threshold_labels = [label for label, score in zip(LABELS, scores) if score > adjusted_thresholds[label]]
172
- phrase_labels = [label for label, _ in matched_phrases]
173
- pattern_labels_used = list(set(threshold_labels + phrase_labels))
174
- abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
175
- top_patterns = sorted([(label, score) for label, score in zip(LABELS, scores)], key=lambda x: x[1], reverse=True)[:2]
176
- motif_phrases = [text for _, text in matched_phrases]
177
- contradiction_flag = detect_contradiction(text)
178
- darvo_score = calculate_darvo_score(pattern_labels_used, 0.0, sentiment_score, motif_phrases, contradiction_flag)
179
- return abuse_level, pattern_labels_used, top_patterns, darvo_score, sentiment
180
 
181
- # --- Composite Analysis ---
182
- def analyze_composite(msg1, msg2, msg3, flags):
183
  thresholds = THRESHOLDS.copy()
184
  messages = [msg1, msg2, msg3]
185
- active_messages = [m for m in messages if m.strip()]
186
- if not active_messages:
187
  return "Please enter at least one message."
188
 
189
- results = []
190
- sentiment_labels = []
191
- sentiment_score_total = 0.0
192
-
193
- for m in active_messages:
194
- result = analyze_single_message(m, thresholds, flags)
195
- results.append(result)
196
- sentiment_labels.append(result[4]["label"])
197
- if result[4]["label"] == "undermining":
198
- sentiment_score_total += 0.5
199
-
200
- # Conflicting tone logic
201
- undermining_count = sentiment_labels.count("undermining")
202
- supportive_count = sentiment_labels.count("supportive")
203
-
204
- if undermining_count > supportive_count:
205
- thresholds = {k: v * 0.9 for k, v in thresholds.items()}
206
- elif undermining_count and supportive_count:
207
- thresholds = {k: v * 0.95 for k, v in thresholds.items()}
208
- print("⚖️ Detected conflicting sentiment across messages.")
209
-
210
  abuse_scores = [r[0] for r in results]
211
- darvo_scores = [r[3] for r in results]
212
- average_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
213
- base_score = sum(abuse_scores) / len(abuse_scores)
214
-
215
- label_sets = [[label for label, _ in r[2]] for r in results]
216
- label_counts = {label: sum(label in s for s in label_sets) for label in set().union(*label_sets)}
217
- top_label = max(label_counts.items(), key=lambda x: x[1])
218
- top_explanation = EXPLANATIONS.get(top_label[0], "")
219
-
220
- flag_weights = {
221
- "They've threatened harm": 6,
222
- "They isolate me": 5,
223
- "I’ve changed my behavior out of fear": 4,
224
- "They monitor/follow me": 4,
225
- "I feel unsafe when alone with them": 6
226
- }
227
- flag_boost = sum(flag_weights.get(f, 3) for f in flags) / len(active_messages)
228
- composite_score = min(base_score + flag_boost, 100)
229
- if len(active_messages) == 1:
230
- composite_score *= 0.85
231
- elif len(active_messages) == 2:
232
- composite_score *= 0.93
233
- composite_score = round(min(composite_score, 100), 2)
234
-
235
- result = f"These messages show a pattern of **{top_label[0]}** and are estimated to be {composite_score}% likely abusive."
236
- if top_explanation:
237
- result += f"\n• {top_explanation}"
238
- if average_darvo > 0.25:
239
- darvo_descriptor = "moderate" if average_darvo < 0.65 else "high"
240
- result += f"\n\nDARVO Score: {average_darvo} → This indicates a **{darvo_descriptor} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
241
- result += generate_risk_snippet(composite_score, top_label[0])
242
-
243
- if undermining_count and supportive_count:
244
- result += "\n\n⚖️ These messages contain **conflicting emotional tones** — this may indicate mixed signals, ambivalence, or a push-pull dynamic. Use caution interpreting any one message alone."
245
 
246
- return result
 
 
 
 
 
247
 
248
  # --- Gradio Interface ---
249
  textbox_inputs = [
@@ -252,18 +160,17 @@ textbox_inputs = [
252
  gr.Textbox(label="Message 3")
253
  ]
254
 
255
- checkboxes = gr.CheckboxGroup(label="Contextual Flags", choices=[
256
- "They've threatened harm", "They isolate me", "I’ve changed my behavior out of fear",
257
- "They monitor/follow me", "I feel unsafe when alone with them"
258
- ])
259
 
260
  iface = gr.Interface(
261
  fn=analyze_composite,
262
- inputs=textbox_inputs + [checkboxes],
263
  outputs=gr.Textbox(label="Results"),
264
- title="Abuse Pattern Detector (Multi-Message)",
265
  allow_flagging="manual"
266
  )
267
 
268
  if __name__ == "__main__":
269
- iface.launch()
 
98
  title, summary, advice = RISK_SNIPPETS[risk_level]
99
  return f"\n\n{title}\n{summary} (Pattern: **{top_label}**)\n💡 {advice}"
100
 
101
+ # --- Escalation Quiz Questions & Weights ---
102
+ ESCALATION_QUESTIONS = [
103
+ ("Partner has access to firearms or weapons", 4),
104
+ ("Partner threatened to kill you", 3),
105
+ ("Partner threatened you with a weapon", 3),
106
+ ("Partner ever choked or strangled you", 4),
107
+ ("Partner injured or threatened your pet(s)", 3),
108
+ ("Partner destroyed property to intimidate you", 2),
109
+ ("Partner forced you into unwanted sexual acts", 3),
110
+ ("Partner threatened to take away your children", 2),
111
+ ("Violence has increased in frequency or severity", 3),
112
+ ("Partner monitors your calls/GPS/social media", 2)
113
  ]
114
 
115
+ # --- Core Analysis Functions (unchanged) ---
116
+ # ... (analyze_single_message, calculate_darvo_score, etc.)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ # --- Composite Analysis with Escalation Quiz ---
119
+ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
120
+ # split args: first len(ESCALATION_QUESTIONS) are checkboxes, last is none_of_above
121
+ responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
122
+ none_selected = answers_and_none[-1]
 
 
123
 
124
+ # compute escalation score
125
+ if none_selected:
126
+ escalation_score = 0
127
+ else:
128
+ escalation_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
129
+ # bucket
130
+ if escalation_score >= 16:
131
+ escalation_level = "High"
132
+ elif escalation_score >= 8:
133
+ escalation_level = "Moderate"
134
+ else:
135
+ escalation_level = "Low"
 
 
 
 
 
 
 
 
136
 
137
+ # existing abuse analysis
 
138
  thresholds = THRESHOLDS.copy()
139
  messages = [msg1, msg2, msg3]
140
+ active = [m for m in messages if m.strip()]
141
+ if not active:
142
  return "Please enter at least one message."
143
 
144
+ results = [analyze_single_message(m, thresholds, []) for m in active]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  abuse_scores = [r[0] for r in results]
146
+ top_pattern = max({label for r in results for label in r[2]}, key=lambda l: abuse_scores[0])
147
+ composite_abuse = round(sum(abuse_scores)/len(abuse_scores),2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ # build output
150
+ out = f"Abuse Intensity: {composite_abuse}%\n"
151
+ out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _,w in ESCALATION_QUESTIONS)})"
152
+ # abuse snippet
153
+ out += generate_risk_snippet(composite_abuse, top_pattern)
154
+ return out
155
 
156
  # --- Gradio Interface ---
157
  textbox_inputs = [
 
160
  gr.Textbox(label="Message 3")
161
  ]
162
 
163
+ # Escalation quiz inputs
164
+ quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
165
+ none_box = gr.Checkbox(label="None of the above")
 
166
 
167
  iface = gr.Interface(
168
  fn=analyze_composite,
169
+ inputs=textbox_inputs + quiz_boxes + [none_box],
170
  outputs=gr.Textbox(label="Results"),
171
+ title="Abuse Pattern Detector + Escalation Quiz",
172
  allow_flagging="manual"
173
  )
174
 
175
  if __name__ == "__main__":
176
+ iface.launch()