SamanthaStorm commited on
Commit
83bf881
·
verified ·
1 Parent(s): 88862a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -99
app.py CHANGED
@@ -72,43 +72,6 @@ def get_risk_stage(patterns, sentiment):
72
  return 4
73
  return 1
74
 
75
- def analyze_single_message(text, thresholds):
76
- motif_hits, matched_phrases = detect_motifs(text)
77
- result = sst_pipeline(text)[0]
78
- sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
79
- sentiment_score = result['score'] if sentiment == "undermining" else 0.0
80
-
81
- adjusted_thresholds = {
82
- k: v + 0.05 if sentiment == "supportive" else v
83
- for k, v in thresholds.items()
84
- }
85
-
86
- contradiction_flag = detect_contradiction(text)
87
- motifs = [phrase for _, phrase in matched_phrases]
88
-
89
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
90
- with torch.no_grad():
91
- outputs = model(**inputs)
92
- scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
93
-
94
- threshold_labels = [
95
- label for label, score in zip(LABELS, scores)
96
- if score > adjusted_thresholds[label]
97
- ]
98
-
99
- top_patterns = sorted(
100
- [(label, score) for label, score in zip(LABELS, scores)],
101
- key=lambda x: x[1],
102
- reverse=True
103
- )[:2]
104
-
105
- weighted_scores = [(PATTERN_WEIGHTS.get(label, 1.0) * score) for label, score in top_patterns]
106
- abuse_score = min(np.mean(weighted_scores) * 100, 100)
107
-
108
- stage = get_risk_stage(threshold_labels, sentiment)
109
-
110
- return abuse_score, threshold_labels, top_patterns, result, stage
111
-
112
  def generate_risk_snippet(abuse_score, top_label, escalation_score):
113
  if abuse_score >= 85 or escalation_score >= 16:
114
  risk_level = "high"
@@ -120,73 +83,44 @@ def generate_risk_snippet(abuse_score, top_label, escalation_score):
120
  pattern_label = top_label.split(" – ")[0]
121
  pattern_score = top_label.split(" – ")[1] if " – " in top_label else ""
122
 
123
- base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  base += f"This message shows strong indicators of **{pattern_label}**. "
125
 
126
  if risk_level == "high":
127
- base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
 
128
  elif risk_level == "moderate":
129
- base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
 
130
  else:
131
- base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
132
-
133
- base += "\n💡 *Why this might be flagged:*\n"
134
- base += (
135
- "This message may seem supportive, but language like “Do you need me to come home?” can sometimes carry implied pressure, especially if declining leads to guilt, tension, or emotional withdrawal. "
136
- "The model looks for patterns that reflect subtle coercion, obligation, or reversal dynamics—even when not overtly aggressive.\n"
137
- )
138
-
139
- base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
 
140
  base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
141
 
142
  return base
143
-
144
- def analyze_composite(msg1, msg2, msg3, *answers_and_none):
145
- responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
146
- none_selected = answers_and_none[-1]
147
- escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
148
- messages = [msg1, msg2, msg3]
149
- active = [m for m in messages if m.strip()]
150
- if not active:
151
- return "Please enter at least one message."
152
-
153
- results = [analyze_single_message(m, THRESHOLDS.copy()) for m in active]
154
- abuse_scores = [r[0] for r in results]
155
- top_labels = [r[2][0][0] for r in results]
156
- top_scores = [r[2][0][1] for r in results]
157
- sentiments = [r[3]['label'] for r in results]
158
- stages = [r[4] for r in results]
159
-
160
- most_common_stage = max(set(stages), key=stages.count)
161
- stage_text = RISK_STAGE_LABELS[most_common_stage]
162
-
163
- top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
164
- composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
165
-
166
- if composite_abuse >= 85 or escalation_score >= 16:
167
- risk_level = "high"
168
- elif composite_abuse >= 60 or escalation_score >= 8:
169
- risk_level = "moderate"
170
- else:
171
- risk_level = "low"
172
-
173
- out = f"Abuse Intensity: {composite_abuse}%\n"
174
- out += f"Escalation Potential: {risk_level.capitalize()} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
175
- out += generate_risk_snippet(composite_abuse, top_label, escalation_score)
176
- out += f"\n\n{stage_text}"
177
- return out
178
-
179
- textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
180
- quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
181
- none_box = gr.Checkbox(label="None of the above")
182
-
183
- iface = gr.Interface(
184
- fn=analyze_composite,
185
- inputs=textbox_inputs + quiz_boxes + [none_box],
186
- outputs=gr.Textbox(label="Results"),
187
- title="Abuse Pattern Detector + Escalation Quiz",
188
- allow_flagging="manual"
189
- )
190
-
191
- if __name__ == "__main__":
192
- iface.launch()
 
72
  return 4
73
  return 1
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def generate_risk_snippet(abuse_score, top_label, escalation_score):
76
  if abuse_score >= 85 or escalation_score >= 16:
77
  risk_level = "high"
 
83
  pattern_label = top_label.split(" – ")[0]
84
  pattern_score = top_label.split(" – ")[1] if " – " in top_label else ""
85
 
86
+ WHY_FLAGGED = {
87
+ "control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
88
+ "gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
89
+ "dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
90
+ "insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
91
+ "threat": "This message includes threatening language, which is a strong predictor of harm.",
92
+ "blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
93
+ "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
94
+ "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
95
+ "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
96
+ "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
97
+ }
98
+
99
+ explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
100
+
101
+ base = f"
102
+
103
+ 🛑 Risk Level: {risk_level.capitalize()}
104
+ "
105
  base += f"This message shows strong indicators of **{pattern_label}**. "
106
 
107
  if risk_level == "high":
108
+ base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.
109
+ "
110
  elif risk_level == "moderate":
111
+ base += "There are signs of emotional pressure or indirect control that may escalate if repeated.
112
+ "
113
  else:
114
+ base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.
115
+ "
116
+
117
+ base += f"
118
+ 💡 *Why this might be flagged:*
119
+ {explanation}
120
+ "
121
+ base += f"
122
+ Detected Pattern: **{pattern_label} ({pattern_score})**
123
+ "
124
  base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
125
 
126
  return base