SamanthaStorm commited on
Commit
c6c79a8
·
verified ·
1 Parent(s): 8240c34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -128
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import gradio as gr
2
  import torch
3
  import numpy as np
4
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
5
- from transformers import RobertaForSequenceClassification, RobertaTokenizer
6
  from motif_tagging import detect_motifs
7
  import re
8
 
@@ -31,47 +30,13 @@ PATTERN_WEIGHTS = {
31
  "blame shifting": 0.8, "contradictory statements": 0.75
32
  }
33
 
34
- EXPLANATIONS = {
35
- "blame shifting": "Blame-shifting redirects responsibility to avoid accountability.",
36
- "contradictory statements": "Flipping positions or denying previous claims.",
37
- "control": "Attempts to restrict another person’s autonomy.",
38
- "dismissiveness": "Disregarding or belittling someone’s feelings or needs.",
39
- "gaslighting": "Manipulating someone into questioning their reality.",
40
- "guilt tripping": "Using guilt to control or pressure.",
41
- "insults": "Derogatory or demeaning language.",
42
- "obscure language": "Vague, superior, or confusing language used manipulatively.",
43
- "projection": "Accusing someone else of your own behaviors.",
44
- "recovery phase": "Resetting tension without real change.",
45
- "threat": "Using fear or harm to control or intimidate."
46
  }
47
 
48
- RISK_SNIPPETS = {
49
- "low": (
50
- "🟢 Risk Level: Low",
51
- "The language patterns here do not strongly indicate abuse.",
52
- "Check in with yourself and monitor for repeated patterns."
53
- ),
54
- "moderate": (
55
- "⚠️ Risk Level: Moderate to High",
56
- "Language includes control, guilt, or reversal tactics.",
57
- "These patterns reduce self-trust. Document or talk with someone safe."
58
- ),
59
- "high": (
60
- "🛑 Risk Level: High",
61
- "Strong indicators of coercive control or threat present.",
62
- "Consider building a safety plan or contacting support."
63
- )
64
- }
65
-
66
- DARVO_PATTERNS = {
67
- "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
68
- }
69
- DARVO_MOTIFS = [
70
- "i guess i’m the bad guy", "after everything i’ve done", "you always twist everything",
71
- "so now it’s all my fault", "i’m the villain", "i’m always wrong", "you never listen",
72
- "you’re attacking me", "i’m done trying", "i’m the only one who cares"
73
- ]
74
-
75
  ESCALATION_QUESTIONS = [
76
  ("Partner has access to firearms or weapons", 4),
77
  ("Partner threatened to kill you", 3),
@@ -96,50 +61,19 @@ def detect_contradiction(message):
96
  ]
97
  return any(re.search(p, message, flags) for p, flags in patterns)
98
 
99
- def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
100
- pattern_hits = len([p for p in patterns if p in DARVO_PATTERNS])
101
- pattern_score = pattern_hits / len(DARVO_PATTERNS)
102
- sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
103
- motif_hits = len([m for m in motifs_found if m.lower() in DARVO_MOTIFS])
104
- motif_score = motif_hits / len(DARVO_MOTIFS)
105
- contradiction_score = 1.0 if contradiction_flag else 0.0
106
- return round(min(0.3 * pattern_score + 0.3 * sentiment_shift_score + 0.25 * motif_score + 0.15 * contradiction_score, 1.0), 3)
107
-
108
- def generate_risk_snippet(abuse_score, top_label, escalation_score):
109
- if abuse_score >= 85 or escalation_score >= 16:
110
- risk_level = "high"
111
- elif abuse_score >= 60 or escalation_score >= 8:
112
- risk_level = "moderate"
113
- else:
114
- risk_level = "low"
115
-
116
- pattern_label = top_label.split(" – ")[0]
117
- pattern_score = top_label.split(" – ")[1] if " – " in top_label else ""
118
-
119
- base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
120
- base += f"This message shows strong indicators of **{pattern_label}**. "
121
-
122
- if risk_level == "high":
123
- base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
124
- elif risk_level == "moderate":
125
- base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
126
- else:
127
- base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
128
-
129
- base += "\n💡 *Why this might be flagged:*\n"
130
- base += (
131
- "This message may seem supportive, but language like “Do you need me to come home?” can sometimes carry implied pressure, especially if declining leads to guilt, tension, or emotional withdrawal. "
132
- "The model looks for patterns that reflect subtle coercion, obligation, or reversal dynamics—even when not overtly aggressive.\n"
133
- )
134
-
135
- base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
136
- base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
137
-
138
- return base
139
-
140
- def analyze_single_message(text, thresholds, motif_flags):
141
  motif_hits, matched_phrases = detect_motifs(text)
142
-
143
  result = sst_pipeline(text)[0]
144
  sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
145
  sentiment_score = result['score'] if sentiment == "undermining" else 0.0
@@ -157,13 +91,10 @@ def analyze_single_message(text, thresholds, motif_flags):
157
  outputs = model(**inputs)
158
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
159
 
160
- threshold_labels = []
161
- for label, score in zip(LABELS, scores):
162
- if label in {"control", "dismissiveness", "blame shifting"}:
163
- if sentiment == "undermining" and result["score"] > 0.85 and score > adjusted_thresholds[label]:
164
- threshold_labels.append(label)
165
- elif score > adjusted_thresholds[label]:
166
- threshold_labels.append(label)
167
 
168
  top_patterns = sorted(
169
  [(label, score) for label, score in zip(LABELS, scores)],
@@ -171,55 +102,46 @@ def analyze_single_message(text, thresholds, motif_flags):
171
  reverse=True
172
  )[:2]
173
 
174
- pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
175
- darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
176
-
177
- print("\n--- Debug Info ---")
178
- print(f"Text: {text}")
179
- print(f"Sentiment: {sentiment} (raw: {result['label']}, score: {result['score']:.3f})")
180
- print("Abuse Pattern Scores:")
181
- for label, score in zip(LABELS, scores):
182
- passed = "✅" if label in threshold_labels else "❌"
183
- print(f" {label:25} → {score:.3f} {passed}")
184
- print(f"Motifs: {motifs}")
185
- print(f"Contradiction: {contradiction_flag}")
186
- print("------------------\n")
187
-
188
- return (
189
- np.mean([score for _, score in top_patterns]) * 100,
190
- threshold_labels,
191
- top_patterns,
192
- darvo_score,
193
- {"label": sentiment, "raw_label": result['label'], "score": result['score']}
194
- )
195
  def analyze_composite(msg1, msg2, msg3, *answers_and_none):
196
  responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
197
  none_selected = answers_and_none[-1]
198
  escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
199
- escalation_level = "High" if escalation_score >= 16 else "Moderate" if escalation_score >= 8 else "Low"
200
-
201
  messages = [msg1, msg2, msg3]
202
  active = [m for m in messages if m.strip()]
203
  if not active:
204
  return "Please enter at least one message."
205
 
206
- results = [analyze_single_message(m, THRESHOLDS.copy(), []) for m in active]
207
  abuse_scores = [r[0] for r in results]
208
- darvo_scores = [r[3] for r in results]
209
- top_pattern = max(
210
- [(label, score) for r in results for label, score in r[2]],
211
- key=lambda x: x[1]
212
- )
213
- top_label = f"{top_pattern[0]} – {int(round(top_pattern[1] * 100))}%"
 
 
 
214
  composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
215
- avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
 
 
 
 
 
 
216
 
217
  out = f"Abuse Intensity: {composite_abuse}%\n"
218
- out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
219
- out += generate_risk_snippet(composite_abuse, top_label, escalation_score)
220
- if avg_darvo > 0.25:
221
- level = "moderate" if avg_darvo < 0.65 else "high"
222
- out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
223
  return out
224
 
225
  textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
@@ -235,4 +157,4 @@ iface = gr.Interface(
235
  )
236
 
237
  if __name__ == "__main__":
238
- iface.launch()
 
1
  import gradio as gr
2
  import torch
3
  import numpy as np
4
+ from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer
 
5
  from motif_tagging import detect_motifs
6
  import re
7
 
 
30
  "blame shifting": 0.8, "contradictory statements": 0.75
31
  }
32
 
33
+ RISK_STAGE_LABELS = {
34
+ 1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
35
+ 2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
36
+ 3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
37
+ 4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
 
 
 
 
 
 
 
38
  }
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  ESCALATION_QUESTIONS = [
41
  ("Partner has access to firearms or weapons", 4),
42
  ("Partner threatened to kill you", 3),
 
61
  ]
62
  return any(re.search(p, message, flags) for p, flags in patterns)
63
 
64
+ def get_risk_stage(patterns, sentiment):
65
+ if "threat" in patterns or "insults" in patterns:
66
+ return 2
67
+ elif "recovery phase" in patterns:
68
+ return 3
69
+ elif "control" in patterns or "guilt tripping" in patterns:
70
+ return 1
71
+ elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
72
+ return 4
73
+ return 1
74
+
75
+ def analyze_single_message(text, thresholds):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  motif_hits, matched_phrases = detect_motifs(text)
 
77
  result = sst_pipeline(text)[0]
78
  sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
79
  sentiment_score = result['score'] if sentiment == "undermining" else 0.0
 
91
  outputs = model(**inputs)
92
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
93
 
94
+ threshold_labels = [
95
+ label for label, score in zip(LABELS, scores)
96
+ if score > adjusted_thresholds[label]
97
+ ]
 
 
 
98
 
99
  top_patterns = sorted(
100
  [(label, score) for label, score in zip(LABELS, scores)],
 
102
  reverse=True
103
  )[:2]
104
 
105
+ weighted_scores = [(PATTERN_WEIGHTS.get(label, 1.0) * score) for label, score in top_patterns]
106
+ abuse_score = np.mean(weighted_scores) * 100
107
+
108
+ stage = get_risk_stage(threshold_labels, sentiment)
109
+
110
+ return abuse_score, threshold_labels, top_patterns, result, stage
111
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def analyze_composite(msg1, msg2, msg3, *answers_and_none):
113
  responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
114
  none_selected = answers_and_none[-1]
115
  escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
 
 
116
  messages = [msg1, msg2, msg3]
117
  active = [m for m in messages if m.strip()]
118
  if not active:
119
  return "Please enter at least one message."
120
 
121
+ results = [analyze_single_message(m, THRESHOLDS.copy()) for m in active]
122
  abuse_scores = [r[0] for r in results]
123
+ top_labels = [r[2][0][0] for r in results]
124
+ top_scores = [r[2][0][1] for r in results]
125
+ sentiments = [r[3]['label'] for r in results]
126
+ stages = [r[4] for r in results]
127
+
128
+ most_common_stage = max(set(stages), key=stages.count)
129
+ stage_text = RISK_STAGE_LABELS[most_common_stage]
130
+
131
+ top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
132
  composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
133
+
134
+ if composite_abuse >= 85 or escalation_score >= 16:
135
+ risk_level = "high"
136
+ elif composite_abuse >= 60 or escalation_score >= 8:
137
+ risk_level = "moderate"
138
+ else:
139
+ risk_level = "low"
140
 
141
  out = f"Abuse Intensity: {composite_abuse}%\n"
142
+ out += f"Escalation Potential: {risk_level.capitalize()} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})\n"
143
+ out += f"Top Pattern: {top_label}\n"
144
+ out += f"\n{stage_text}"
 
 
145
  return out
146
 
147
  textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
 
157
  )
158
 
159
  if __name__ == "__main__":
160
+ iface.launch()