Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
import numpy as np | |
from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer | |
from motif_tagging import detect_motifs | |
import re | |
# --- SST Sentiment Model --- | |
sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") | |
# --- Abuse Model --- | |
model_name = "SamanthaStorm/autotrain-jlpi4-mllvp" | |
model = RobertaForSequenceClassification.from_pretrained(model_name) | |
tokenizer = RobertaTokenizer.from_pretrained(model_name) | |
LABELS = [ | |
"blame shifting", "contradictory statements", "control", "dismissiveness", | |
"gaslighting", "guilt tripping", "insults", "obscure language", | |
"projection", "recovery phase", "threat" | |
] | |
THRESHOLDS = { | |
"blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.48, "dismissiveness": 0.45, | |
"gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25, | |
"projection": 0.35, "recovery phase": 0.25, "threat": 0.25 | |
} | |
PATTERN_WEIGHTS = { | |
"gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8, | |
"blame shifting": 0.8, "contradictory statements": 0.75 | |
} | |
RISK_STAGE_LABELS = { | |
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.", | |
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.", | |
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.", | |
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it." | |
} | |
ESCALATION_QUESTIONS = [ | |
("Partner has access to firearms or weapons", 4), | |
("Partner threatened to kill you", 3), | |
("Partner threatened you with a weapon", 3), | |
("Partner has ever choked you, even if you considered it consensual at the time", 4), | |
("Partner injured or threatened your pet(s)", 3), | |
("Partner has broken your things, punched or kicked walls, or thrown things ", 2), | |
("Partner forced or coerced you into unwanted sexual acts", 3), | |
("Partner threatened to take away your children", 2), | |
("Violence has increased in frequency or severity", 3), | |
("Partner monitors your calls/GPS/social media", 2) | |
] | |
def detect_contradiction(message): | |
patterns = [ | |
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE), | |
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE), | |
(r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE), | |
(r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE), | |
(r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE), | |
(r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE) | |
] | |
return any(re.search(p, message, flags) for p, flags in patterns) | |
def get_risk_stage(patterns, sentiment): | |
if "threat" in patterns or "insults" in patterns: | |
return 2 | |
elif "recovery phase" in patterns: | |
return 3 | |
elif "control" in patterns or "guilt tripping" in patterns: | |
return 1 | |
elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]): | |
return 4 | |
return 1 | |
def generate_risk_snippet(abuse_score, top_label, escalation_score): | |
if abuse_score >= 85 or escalation_score >= 16: | |
risk_level = "high" | |
elif abuse_score >= 60 or escalation_score >= 8: | |
risk_level = "moderate" | |
else: | |
risk_level = "low" | |
pattern_label = top_label.split(" – ")[0] | |
pattern_score = top_label.split(" – ")[1] if " – " in top_label else "" | |
WHY_FLAGGED = { | |
"control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.", | |
"gaslighting": "This message could be manipulating someone into questioning their perception or feelings.", | |
"dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.", | |
"insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.", | |
"threat": "This message includes threatening language, which is a strong predictor of harm.", | |
"blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.", | |
"guilt tripping": "This message may induce guilt in order to control or manipulate behavior.", | |
"recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.", | |
"projection": "This message may involve attributing the abuser’s own behaviors to the victim.", | |
"default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy." | |
} | |
explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"]) | |
base = f" | |
🛑 Risk Level: {risk_level.capitalize()} | |
" | |
base += f"This message shows strong indicators of **{pattern_label}**. " | |
if risk_level == "high": | |
base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms. | |
" | |
elif risk_level == "moderate": | |
base += "There are signs of emotional pressure or indirect control that may escalate if repeated. | |
" | |
else: | |
base += "The message does not strongly indicate abuse, but it's important to monitor for patterns. | |
" | |
base += f" | |
💡 *Why this might be flagged:* | |
{explanation} | |
" | |
base += f" | |
Detected Pattern: **{pattern_label} ({pattern_score})** | |
" | |
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments." | |
return base | |