Spaces:
Running
Running
File size: 8,700 Bytes
d6e219c f1948f2 909b775 e032990 b54664e 6153eb8 0ff864f 909b775 2f6ac5d f32b7e3 1dbc865 92b1518 a9d4250 e185e86 1dbc865 e185e86 f32b7e3 e185e86 2f6ac5d f32b7e3 2f6ac5d aeed86a e185e86 f32b7e3 e185e86 d33c30b 1dbc865 f32b7e3 1dbc865 f32b7e3 1dbc865 f32b7e3 1dbc865 d33c30b 6139d49 f32b7e3 6139d49 f32b7e3 6139d49 623a77f 6139d49 f32b7e3 6139d49 f32b7e3 6139d49 f32b7e3 6139d49 f32b7e3 6139d49 6cbeaf8 1e3558a 9ab7ab9 9f4751d a568262 909b775 a568262 f32b7e3 6cbeaf8 f32b7e3 a568262 f32b7e3 6cbeaf8 9ab7ab9 9f4751d 6cbeaf8 9f4751d 6cbeaf8 9f4751d 6cbeaf8 909b775 6cbeaf8 f32b7e3 d315105 623a77f b883fe8 1dbc865 d315105 1dbc865 cb6b46c 623a77f 1dbc865 623a77f f32b7e3 cb6b46c d315105 f32b7e3 6139d49 623a77f d315105 ec5f81e f32b7e3 d315105 a28ef35 ab8c96f 1dbc865 d315105 1dbc865 d315105 1dbc865 ab8c96f 4292d1b 1dbc865 909b775 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import gradio as gr
import torch
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import RobertaForSequenceClassification, RobertaTokenizer
from motif_tagging import detect_motifs
import re
# --- SST Sentiment Model ---
sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
# --- Abuse Model ---
model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
model = RobertaForSequenceClassification.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)
LABELS = [
"blame shifting", "contradictory statements", "control", "dismissiveness",
"gaslighting", "guilt tripping", "insults", "obscure language",
"projection", "recovery phase", "threat"
]
THRESHOLDS = {
"blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.48, "dismissiveness": 0.45,
"gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
"projection": 0.35, "recovery phase": 0.25, "threat": 0.25
}
PATTERN_WEIGHTS = {
"gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8,
"blame shifting": 0.8, "contradictory statements": 0.75
}
EXPLANATIONS = {
"blame shifting": "Blame-shifting redirects responsibility to avoid accountability.",
"contradictory statements": "Flipping positions or denying previous claims.",
"control": "Attempts to restrict another person’s autonomy.",
"dismissiveness": "Disregarding or belittling someone’s feelings or needs.",
"gaslighting": "Manipulating someone into questioning their reality.",
"guilt tripping": "Using guilt to control or pressure.",
"insults": "Derogatory or demeaning language.",
"obscure language": "Vague, superior, or confusing language used manipulatively.",
"projection": "Accusing someone else of your own behaviors.",
"recovery phase": "Resetting tension without real change.",
"threat": "Using fear or harm to control or intimidate."
}
RISK_SNIPPETS = {
"low": (
"🟢 Risk Level: Low",
"The language patterns here do not strongly indicate abuse.",
"Check in with yourself and monitor for repeated patterns."
),
"moderate": (
"⚠️ Risk Level: Moderate to High",
"Language includes control, guilt, or reversal tactics.",
"These patterns reduce self-trust. Document or talk with someone safe."
),
"high": (
"🛑 Risk Level: High",
"Strong indicators of coercive control or threat present.",
"Consider building a safety plan or contacting support."
)
}
DARVO_PATTERNS = {
"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
}
DARVO_MOTIFS = [
"i guess i’m the bad guy", "after everything i’ve done", "you always twist everything",
"so now it’s all my fault", "i’m the villain", "i’m always wrong", "you never listen",
"you’re attacking me", "i’m done trying", "i’m the only one who cares"
]
ESCALATION_QUESTIONS = [
("Partner has access to firearms or weapons", 4),
("Partner threatened to kill you", 3),
("Partner threatened you with a weapon", 3),
("Partner has ever choked you, even if you considered it consensual at the time", 4),
("Partner injured or threatened your pet(s)", 3),
("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
("Partner forced or coerced you into unwanted sexual acts", 3),
("Partner threatened to take away your children", 2),
("Violence has increased in frequency or severity", 3),
("Partner monitors your calls/GPS/social media", 2)
]
def detect_contradiction(message):
patterns = [
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
(r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
(r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
(r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
(r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
]
return any(re.search(p, message, flags) for p, flags in patterns)
def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
pattern_hits = len([p for p in patterns if p in DARVO_PATTERNS])
pattern_score = pattern_hits / len(DARVO_PATTERNS)
sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
motif_hits = len([m for m in motifs_found if m.lower() in DARVO_MOTIFS])
motif_score = motif_hits / len(DARVO_MOTIFS)
contradiction_score = 1.0 if contradiction_flag else 0.0
return round(min(0.3 * pattern_score + 0.3 * sentiment_shift_score + 0.25 * motif_score + 0.15 * contradiction_score, 1.0), 3)
def generate_risk_snippet(abuse_score, top_label):
if abuse_score >= 85:
risk_level = "high"
elif abuse_score >= 60:
risk_level = "moderate"
else:
risk_level = "low"
title, summary, advice = RISK_SNIPPETS[risk_level]
return f"\n\n{title}\n{summary} (Pattern: **{str(top_label)}**)\n💡 {advice}"
def analyze_single_message(text, thresholds, motif_flags):
motif_hits, matched_phrases = detect_motifs(text)
# SST Sentiment
result = sst_pipeline(text)[0]
sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
sentiment_score = result['score'] if sentiment == "undermining" else 0.0
adjusted_thresholds = {
k: v + 0.05 if sentiment == "supportive" else v
for k, v in thresholds.items()
}
contradiction_flag = detect_contradiction(text)
motifs = [phrase for _, phrase in matched_phrases]
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
threshold_labels = [
label for label, score in zip(LABELS, scores)
if score > adjusted_thresholds[label]
]
top_patterns = sorted(
[(label, score) for label, score in zip(LABELS, scores)],
key=lambda x: x[1],
reverse=True
)[:2]
pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
return (
np.mean([score for _, score in top_patterns]) * 100,
threshold_labels,
top_patterns,
darvo_score,
{"label": sentiment, "raw_label": result['label'], "score": result['score']}
)
def analyze_composite(msg1, msg2, msg3, *answers_and_none):
responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
none_selected = answers_and_none[-1]
escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
escalation_level = "High" if escalation_score >= 16 else "Moderate" if escalation_score >= 8 else "Low"
messages = [msg1, msg2, msg3]
active = [m for m in messages if m.strip()]
if not active:
return "Please enter at least one message."
results = [analyze_single_message(m, THRESHOLDS.copy(), []) for m in active]
abuse_scores = [r[0] for r in results]
darvo_scores = [r[3] for r in results]
top_label = max({label for r in results for label in r[2]}, key=lambda l: abuse_scores[0])
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
out = f"Abuse Intensity: {composite_abuse}%\n"
out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
out += generate_risk_snippet(composite_abuse, top_label)
if avg_darvo > 0.25:
level = "moderate" if avg_darvo < 0.65 else "high"
out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
return out
textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
none_box = gr.Checkbox(label="None of the above")
iface = gr.Interface(
fn=analyze_composite,
inputs=textbox_inputs + quiz_boxes + [none_box],
outputs=gr.Textbox(label="Results"),
title="Abuse Pattern Detector + Escalation Quiz",
allow_flagging="manual"
)
if __name__ == "__main__":
iface.launch()
|