Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
-
from transformers import pipeline,
|
5 |
-
from transformers import RobertaForSequenceClassification, RobertaTokenizer
|
6 |
from motif_tagging import detect_motifs
|
7 |
import re
|
8 |
|
@@ -31,47 +30,13 @@ PATTERN_WEIGHTS = {
|
|
31 |
"blame shifting": 0.8, "contradictory statements": 0.75
|
32 |
}
|
33 |
|
34 |
-
|
35 |
-
"
|
36 |
-
"
|
37 |
-
|
38 |
-
|
39 |
-
"gaslighting": "Manipulating someone into questioning their reality.",
|
40 |
-
"guilt tripping": "Using guilt to control or pressure.",
|
41 |
-
"insults": "Derogatory or demeaning language.",
|
42 |
-
"obscure language": "Vague, superior, or confusing language used manipulatively.",
|
43 |
-
"projection": "Accusing someone else of your own behaviors.",
|
44 |
-
"recovery phase": "Resetting tension without real change.",
|
45 |
-
"threat": "Using fear or harm to control or intimidate."
|
46 |
}
|
47 |
|
48 |
-
RISK_SNIPPETS = {
|
49 |
-
"low": (
|
50 |
-
"🟢 Risk Level: Low",
|
51 |
-
"The language patterns here do not strongly indicate abuse.",
|
52 |
-
"Check in with yourself and monitor for repeated patterns."
|
53 |
-
),
|
54 |
-
"moderate": (
|
55 |
-
"⚠️ Risk Level: Moderate to High",
|
56 |
-
"Language includes control, guilt, or reversal tactics.",
|
57 |
-
"These patterns reduce self-trust. Document or talk with someone safe."
|
58 |
-
),
|
59 |
-
"high": (
|
60 |
-
"🛑 Risk Level: High",
|
61 |
-
"Strong indicators of coercive control or threat present.",
|
62 |
-
"Consider building a safety plan or contacting support."
|
63 |
-
)
|
64 |
-
}
|
65 |
-
|
66 |
-
DARVO_PATTERNS = {
|
67 |
-
"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
|
68 |
-
}
|
69 |
-
DARVO_MOTIFS = [
|
70 |
-
"i guess i’m the bad guy", "after everything i’ve done", "you always twist everything",
|
71 |
-
"so now it’s all my fault", "i’m the villain", "i’m always wrong", "you never listen",
|
72 |
-
"you’re attacking me", "i’m done trying", "i’m the only one who cares"
|
73 |
-
]
|
74 |
-
|
75 |
ESCALATION_QUESTIONS = [
|
76 |
("Partner has access to firearms or weapons", 4),
|
77 |
("Partner threatened to kill you", 3),
|
@@ -96,50 +61,19 @@ def detect_contradiction(message):
|
|
96 |
]
|
97 |
return any(re.search(p, message, flags) for p, flags in patterns)
|
98 |
|
99 |
-
def
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
elif abuse_score >= 60 or escalation_score >= 8:
|
112 |
-
risk_level = "moderate"
|
113 |
-
else:
|
114 |
-
risk_level = "low"
|
115 |
-
|
116 |
-
pattern_label = top_label.split(" – ")[0]
|
117 |
-
pattern_score = top_label.split(" – ")[1] if " – " in top_label else ""
|
118 |
-
|
119 |
-
base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
|
120 |
-
base += f"This message shows strong indicators of **{pattern_label}**. "
|
121 |
-
|
122 |
-
if risk_level == "high":
|
123 |
-
base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
|
124 |
-
elif risk_level == "moderate":
|
125 |
-
base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
|
126 |
-
else:
|
127 |
-
base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
|
128 |
-
|
129 |
-
base += "\n💡 *Why this might be flagged:*\n"
|
130 |
-
base += (
|
131 |
-
"This message may seem supportive, but language like “Do you need me to come home?” can sometimes carry implied pressure, especially if declining leads to guilt, tension, or emotional withdrawal. "
|
132 |
-
"The model looks for patterns that reflect subtle coercion, obligation, or reversal dynamics—even when not overtly aggressive.\n"
|
133 |
-
)
|
134 |
-
|
135 |
-
base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
|
136 |
-
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
|
137 |
-
|
138 |
-
return base
|
139 |
-
|
140 |
-
def analyze_single_message(text, thresholds, motif_flags):
|
141 |
motif_hits, matched_phrases = detect_motifs(text)
|
142 |
-
|
143 |
result = sst_pipeline(text)[0]
|
144 |
sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
|
145 |
sentiment_score = result['score'] if sentiment == "undermining" else 0.0
|
@@ -157,13 +91,10 @@ def analyze_single_message(text, thresholds, motif_flags):
|
|
157 |
outputs = model(**inputs)
|
158 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
159 |
|
160 |
-
threshold_labels = [
|
161 |
-
|
162 |
-
if
|
163 |
-
|
164 |
-
threshold_labels.append(label)
|
165 |
-
elif score > adjusted_thresholds[label]:
|
166 |
-
threshold_labels.append(label)
|
167 |
|
168 |
top_patterns = sorted(
|
169 |
[(label, score) for label, score in zip(LABELS, scores)],
|
@@ -171,55 +102,46 @@ def analyze_single_message(text, thresholds, motif_flags):
|
|
171 |
reverse=True
|
172 |
)[:2]
|
173 |
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
for label, score in zip(LABELS, scores):
|
182 |
-
passed = "✅" if label in threshold_labels else "❌"
|
183 |
-
print(f" {label:25} → {score:.3f} {passed}")
|
184 |
-
print(f"Motifs: {motifs}")
|
185 |
-
print(f"Contradiction: {contradiction_flag}")
|
186 |
-
print("------------------\n")
|
187 |
-
|
188 |
-
return (
|
189 |
-
np.mean([score for _, score in top_patterns]) * 100,
|
190 |
-
threshold_labels,
|
191 |
-
top_patterns,
|
192 |
-
darvo_score,
|
193 |
-
{"label": sentiment, "raw_label": result['label'], "score": result['score']}
|
194 |
-
)
|
195 |
def analyze_composite(msg1, msg2, msg3, *answers_and_none):
|
196 |
responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
|
197 |
none_selected = answers_and_none[-1]
|
198 |
escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
|
199 |
-
escalation_level = "High" if escalation_score >= 16 else "Moderate" if escalation_score >= 8 else "Low"
|
200 |
-
|
201 |
messages = [msg1, msg2, msg3]
|
202 |
active = [m for m in messages if m.strip()]
|
203 |
if not active:
|
204 |
return "Please enter at least one message."
|
205 |
|
206 |
-
results = [analyze_single_message(m, THRESHOLDS.copy()
|
207 |
abuse_scores = [r[0] for r in results]
|
208 |
-
|
209 |
-
|
210 |
-
[
|
211 |
-
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
214 |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
|
217 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
218 |
-
out += f"Escalation Potential: {
|
219 |
-
out +=
|
220 |
-
|
221 |
-
level = "moderate" if avg_darvo < 0.65 else "high"
|
222 |
-
out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
|
223 |
return out
|
224 |
|
225 |
textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
|
@@ -235,4 +157,4 @@ iface = gr.Interface(
|
|
235 |
)
|
236 |
|
237 |
if __name__ == "__main__":
|
238 |
-
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
+
from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer
|
|
|
5 |
from motif_tagging import detect_motifs
|
6 |
import re
|
7 |
|
|
|
30 |
"blame shifting": 0.8, "contradictory statements": 0.75
|
31 |
}
|
32 |
|
33 |
+
RISK_STAGE_LABELS = {
|
34 |
+
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
|
35 |
+
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
|
36 |
+
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
|
37 |
+
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
}
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
ESCALATION_QUESTIONS = [
|
41 |
("Partner has access to firearms or weapons", 4),
|
42 |
("Partner threatened to kill you", 3),
|
|
|
61 |
]
|
62 |
return any(re.search(p, message, flags) for p, flags in patterns)
|
63 |
|
64 |
+
def get_risk_stage(patterns, sentiment):
|
65 |
+
if "threat" in patterns or "insults" in patterns:
|
66 |
+
return 2
|
67 |
+
elif "recovery phase" in patterns:
|
68 |
+
return 3
|
69 |
+
elif "control" in patterns or "guilt tripping" in patterns:
|
70 |
+
return 1
|
71 |
+
elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
|
72 |
+
return 4
|
73 |
+
return 1
|
74 |
+
|
75 |
+
def analyze_single_message(text, thresholds):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
motif_hits, matched_phrases = detect_motifs(text)
|
|
|
77 |
result = sst_pipeline(text)[0]
|
78 |
sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
|
79 |
sentiment_score = result['score'] if sentiment == "undermining" else 0.0
|
|
|
91 |
outputs = model(**inputs)
|
92 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
93 |
|
94 |
+
threshold_labels = [
|
95 |
+
label for label, score in zip(LABELS, scores)
|
96 |
+
if score > adjusted_thresholds[label]
|
97 |
+
]
|
|
|
|
|
|
|
98 |
|
99 |
top_patterns = sorted(
|
100 |
[(label, score) for label, score in zip(LABELS, scores)],
|
|
|
102 |
reverse=True
|
103 |
)[:2]
|
104 |
|
105 |
+
weighted_scores = [(PATTERN_WEIGHTS.get(label, 1.0) * score) for label, score in top_patterns]
|
106 |
+
abuse_score = np.mean(weighted_scores) * 100
|
107 |
+
|
108 |
+
stage = get_risk_stage(threshold_labels, sentiment)
|
109 |
+
|
110 |
+
return abuse_score, threshold_labels, top_patterns, result, stage
|
111 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
def analyze_composite(msg1, msg2, msg3, *answers_and_none):
|
113 |
responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
|
114 |
none_selected = answers_and_none[-1]
|
115 |
escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
|
|
|
|
|
116 |
messages = [msg1, msg2, msg3]
|
117 |
active = [m for m in messages if m.strip()]
|
118 |
if not active:
|
119 |
return "Please enter at least one message."
|
120 |
|
121 |
+
results = [analyze_single_message(m, THRESHOLDS.copy()) for m in active]
|
122 |
abuse_scores = [r[0] for r in results]
|
123 |
+
top_labels = [r[2][0][0] for r in results]
|
124 |
+
top_scores = [r[2][0][1] for r in results]
|
125 |
+
sentiments = [r[3]['label'] for r in results]
|
126 |
+
stages = [r[4] for r in results]
|
127 |
+
|
128 |
+
most_common_stage = max(set(stages), key=stages.count)
|
129 |
+
stage_text = RISK_STAGE_LABELS[most_common_stage]
|
130 |
+
|
131 |
+
top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
|
132 |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
|
133 |
+
|
134 |
+
if composite_abuse >= 85 or escalation_score >= 16:
|
135 |
+
risk_level = "high"
|
136 |
+
elif composite_abuse >= 60 or escalation_score >= 8:
|
137 |
+
risk_level = "moderate"
|
138 |
+
else:
|
139 |
+
risk_level = "low"
|
140 |
|
141 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
142 |
+
out += f"Escalation Potential: {risk_level.capitalize()} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})\n"
|
143 |
+
out += f"Top Pattern: {top_label}\n"
|
144 |
+
out += f"\n{stage_text}"
|
|
|
|
|
145 |
return out
|
146 |
|
147 |
textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
|
|
|
157 |
)
|
158 |
|
159 |
if __name__ == "__main__":
|
160 |
+
iface.launch()
|