Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -72,43 +72,6 @@ def get_risk_stage(patterns, sentiment):
|
|
72 |
return 4
|
73 |
return 1
|
74 |
|
75 |
-
def analyze_single_message(text, thresholds):
|
76 |
-
motif_hits, matched_phrases = detect_motifs(text)
|
77 |
-
result = sst_pipeline(text)[0]
|
78 |
-
sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
|
79 |
-
sentiment_score = result['score'] if sentiment == "undermining" else 0.0
|
80 |
-
|
81 |
-
adjusted_thresholds = {
|
82 |
-
k: v + 0.05 if sentiment == "supportive" else v
|
83 |
-
for k, v in thresholds.items()
|
84 |
-
}
|
85 |
-
|
86 |
-
contradiction_flag = detect_contradiction(text)
|
87 |
-
motifs = [phrase for _, phrase in matched_phrases]
|
88 |
-
|
89 |
-
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
90 |
-
with torch.no_grad():
|
91 |
-
outputs = model(**inputs)
|
92 |
-
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
93 |
-
|
94 |
-
threshold_labels = [
|
95 |
-
label for label, score in zip(LABELS, scores)
|
96 |
-
if score > adjusted_thresholds[label]
|
97 |
-
]
|
98 |
-
|
99 |
-
top_patterns = sorted(
|
100 |
-
[(label, score) for label, score in zip(LABELS, scores)],
|
101 |
-
key=lambda x: x[1],
|
102 |
-
reverse=True
|
103 |
-
)[:2]
|
104 |
-
|
105 |
-
weighted_scores = [(PATTERN_WEIGHTS.get(label, 1.0) * score) for label, score in top_patterns]
|
106 |
-
abuse_score = min(np.mean(weighted_scores) * 100, 100)
|
107 |
-
|
108 |
-
stage = get_risk_stage(threshold_labels, sentiment)
|
109 |
-
|
110 |
-
return abuse_score, threshold_labels, top_patterns, result, stage
|
111 |
-
|
112 |
def generate_risk_snippet(abuse_score, top_label, escalation_score):
|
113 |
if abuse_score >= 85 or escalation_score >= 16:
|
114 |
risk_level = "high"
|
@@ -120,73 +83,44 @@ def generate_risk_snippet(abuse_score, top_label, escalation_score):
|
|
120 |
pattern_label = top_label.split(" – ")[0]
|
121 |
pattern_score = top_label.split(" – ")[1] if " – " in top_label else ""
|
122 |
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
base += f"This message shows strong indicators of **{pattern_label}**. "
|
125 |
|
126 |
if risk_level == "high":
|
127 |
-
base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms
|
|
|
128 |
elif risk_level == "moderate":
|
129 |
-
base += "There are signs of emotional pressure or indirect control that may escalate if repeated
|
|
|
130 |
else:
|
131 |
-
base += "The message does not strongly indicate abuse, but it's important to monitor for patterns
|
132 |
-
|
133 |
-
|
134 |
-
base +=
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
140 |
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
|
141 |
|
142 |
return base
|
143 |
-
|
144 |
-
def analyze_composite(msg1, msg2, msg3, *answers_and_none):
|
145 |
-
responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
|
146 |
-
none_selected = answers_and_none[-1]
|
147 |
-
escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
|
148 |
-
messages = [msg1, msg2, msg3]
|
149 |
-
active = [m for m in messages if m.strip()]
|
150 |
-
if not active:
|
151 |
-
return "Please enter at least one message."
|
152 |
-
|
153 |
-
results = [analyze_single_message(m, THRESHOLDS.copy()) for m in active]
|
154 |
-
abuse_scores = [r[0] for r in results]
|
155 |
-
top_labels = [r[2][0][0] for r in results]
|
156 |
-
top_scores = [r[2][0][1] for r in results]
|
157 |
-
sentiments = [r[3]['label'] for r in results]
|
158 |
-
stages = [r[4] for r in results]
|
159 |
-
|
160 |
-
most_common_stage = max(set(stages), key=stages.count)
|
161 |
-
stage_text = RISK_STAGE_LABELS[most_common_stage]
|
162 |
-
|
163 |
-
top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
|
164 |
-
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
165 |
-
|
166 |
-
if composite_abuse >= 85 or escalation_score >= 16:
|
167 |
-
risk_level = "high"
|
168 |
-
elif composite_abuse >= 60 or escalation_score >= 8:
|
169 |
-
risk_level = "moderate"
|
170 |
-
else:
|
171 |
-
risk_level = "low"
|
172 |
-
|
173 |
-
out = f"Abuse Intensity: {composite_abuse}%\n"
|
174 |
-
out += f"Escalation Potential: {risk_level.capitalize()} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
|
175 |
-
out += generate_risk_snippet(composite_abuse, top_label, escalation_score)
|
176 |
-
out += f"\n\n{stage_text}"
|
177 |
-
return out
|
178 |
-
|
179 |
-
textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
|
180 |
-
quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
|
181 |
-
none_box = gr.Checkbox(label="None of the above")
|
182 |
-
|
183 |
-
iface = gr.Interface(
|
184 |
-
fn=analyze_composite,
|
185 |
-
inputs=textbox_inputs + quiz_boxes + [none_box],
|
186 |
-
outputs=gr.Textbox(label="Results"),
|
187 |
-
title="Abuse Pattern Detector + Escalation Quiz",
|
188 |
-
allow_flagging="manual"
|
189 |
-
)
|
190 |
-
|
191 |
-
if __name__ == "__main__":
|
192 |
-
iface.launch()
|
|
|
72 |
return 4
|
73 |
return 1
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
def generate_risk_snippet(abuse_score, top_label, escalation_score):
|
76 |
if abuse_score >= 85 or escalation_score >= 16:
|
77 |
risk_level = "high"
|
|
|
83 |
pattern_label = top_label.split(" – ")[0]
|
84 |
pattern_score = top_label.split(" – ")[1] if " – " in top_label else ""
|
85 |
|
86 |
+
WHY_FLAGGED = {
|
87 |
+
"control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
|
88 |
+
"gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
|
89 |
+
"dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
|
90 |
+
"insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
|
91 |
+
"threat": "This message includes threatening language, which is a strong predictor of harm.",
|
92 |
+
"blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
|
93 |
+
"guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
|
94 |
+
"recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
|
95 |
+
"projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
|
96 |
+
"default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
|
97 |
+
}
|
98 |
+
|
99 |
+
explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
|
100 |
+
|
101 |
+
base = f"
|
102 |
+
|
103 |
+
🛑 Risk Level: {risk_level.capitalize()}
|
104 |
+
"
|
105 |
base += f"This message shows strong indicators of **{pattern_label}**. "
|
106 |
|
107 |
if risk_level == "high":
|
108 |
+
base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.
|
109 |
+
"
|
110 |
elif risk_level == "moderate":
|
111 |
+
base += "There are signs of emotional pressure or indirect control that may escalate if repeated.
|
112 |
+
"
|
113 |
else:
|
114 |
+
base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.
|
115 |
+
"
|
116 |
+
|
117 |
+
base += f"
|
118 |
+
💡 *Why this might be flagged:*
|
119 |
+
{explanation}
|
120 |
+
"
|
121 |
+
base += f"
|
122 |
+
Detected Pattern: **{pattern_label} ({pattern_score})**
|
123 |
+
"
|
124 |
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
|
125 |
|
126 |
return base
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|