Spaces:
Running
Running
File size: 6,565 Bytes
d6e219c f1948f2 c6c79a8 b54664e 6153eb8 0ff864f 909b775 2f6ac5d f32b7e3 1dbc865 92b1518 a9d4250 e185e86 1dbc865 e185e86 ca1d104 f32b7e3 e185e86 2f6ac5d f32b7e3 2f6ac5d aeed86a c6c79a8 e185e86 f32b7e3 6139d49 f32b7e3 6139d49 623a77f 6139d49 f32b7e3 6139d49 c6c79a8 9f4751d 909b775 a568262 f32b7e3 6cbeaf8 f32b7e3 a568262 f32b7e3 6cbeaf8 9ab7ab9 9f4751d c6c79a8 ca1d104 6cbeaf8 9f4751d c6c79a8 d315105 623a77f 1dbc865 d315105 1dbc865 cb6b46c c6c79a8 1dbc865 c6c79a8 f32b7e3 c6c79a8 cb6b46c d315105 c6c79a8 d315105 ec5f81e f32b7e3 d315105 a28ef35 ab8c96f 1dbc865 d315105 1dbc865 d315105 1dbc865 ab8c96f 4292d1b 1dbc865 c6c79a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import gradio as gr
import torch
import numpy as np
from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer
from motif_tagging import detect_motifs
import re
# --- SST Sentiment Model ---
sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
# --- Abuse Model ---
model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
model = RobertaForSequenceClassification.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)
LABELS = [
"blame shifting", "contradictory statements", "control", "dismissiveness",
"gaslighting", "guilt tripping", "insults", "obscure language",
"projection", "recovery phase", "threat"
]
THRESHOLDS = {
"blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.48, "dismissiveness": 0.45,
"gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
"projection": 0.35, "recovery phase": 0.25, "threat": 0.25
}
PATTERN_WEIGHTS = {
"gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8,
"blame shifting": 0.8, "contradictory statements": 0.75
}
RISK_STAGE_LABELS = {
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
}
ESCALATION_QUESTIONS = [
("Partner has access to firearms or weapons", 4),
("Partner threatened to kill you", 3),
("Partner threatened you with a weapon", 3),
("Partner has ever choked you, even if you considered it consensual at the time", 4),
("Partner injured or threatened your pet(s)", 3),
("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
("Partner forced or coerced you into unwanted sexual acts", 3),
("Partner threatened to take away your children", 2),
("Violence has increased in frequency or severity", 3),
("Partner monitors your calls/GPS/social media", 2)
]
def detect_contradiction(message):
patterns = [
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
(r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
(r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
(r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
(r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
]
return any(re.search(p, message, flags) for p, flags in patterns)
def get_risk_stage(patterns, sentiment):
if "threat" in patterns or "insults" in patterns:
return 2
elif "recovery phase" in patterns:
return 3
elif "control" in patterns or "guilt tripping" in patterns:
return 1
elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
return 4
return 1
def analyze_single_message(text, thresholds):
motif_hits, matched_phrases = detect_motifs(text)
result = sst_pipeline(text)[0]
sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
sentiment_score = result['score'] if sentiment == "undermining" else 0.0
adjusted_thresholds = {
k: v + 0.05 if sentiment == "supportive" else v
for k, v in thresholds.items()
}
contradiction_flag = detect_contradiction(text)
motifs = [phrase for _, phrase in matched_phrases]
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
threshold_labels = [
label for label, score in zip(LABELS, scores)
if score > adjusted_thresholds[label]
]
top_patterns = sorted(
[(label, score) for label, score in zip(LABELS, scores)],
key=lambda x: x[1],
reverse=True
)[:2]
weighted_scores = [(PATTERN_WEIGHTS.get(label, 1.0) * score) for label, score in top_patterns]
abuse_score = np.mean(weighted_scores) * 100
stage = get_risk_stage(threshold_labels, sentiment)
return abuse_score, threshold_labels, top_patterns, result, stage
def analyze_composite(msg1, msg2, msg3, *answers_and_none):
responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
none_selected = answers_and_none[-1]
escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
messages = [msg1, msg2, msg3]
active = [m for m in messages if m.strip()]
if not active:
return "Please enter at least one message."
results = [analyze_single_message(m, THRESHOLDS.copy()) for m in active]
abuse_scores = [r[0] for r in results]
top_labels = [r[2][0][0] for r in results]
top_scores = [r[2][0][1] for r in results]
sentiments = [r[3]['label'] for r in results]
stages = [r[4] for r in results]
most_common_stage = max(set(stages), key=stages.count)
stage_text = RISK_STAGE_LABELS[most_common_stage]
top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
if composite_abuse >= 85 or escalation_score >= 16:
risk_level = "high"
elif composite_abuse >= 60 or escalation_score >= 8:
risk_level = "moderate"
else:
risk_level = "low"
out = f"Abuse Intensity: {composite_abuse}%\n"
out += f"Escalation Potential: {risk_level.capitalize()} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})\n"
out += f"Top Pattern: {top_label}\n"
out += f"\n{stage_text}"
return out
textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
none_box = gr.Checkbox(label="None of the above")
iface = gr.Interface(
fn=analyze_composite,
inputs=textbox_inputs + quiz_boxes + [none_box],
outputs=gr.Textbox(label="Results"),
title="Abuse Pattern Detector + Escalation Quiz",
allow_flagging="manual"
)
if __name__ == "__main__":
iface.launch() |