Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
import numpy as np | |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
from motif_tagging import detect_motifs | |
import re | |
import matplotlib.pyplot as plt | |
import io | |
from PIL import Image | |
from datetime import datetime | |
# ——— Constants & Utilities ——— | |
LABELS = [ | |
"blame shifting", "contradictory statements", "control", "dismissiveness", | |
"gaslighting", "guilt tripping", "insults", "obscure language", | |
"projection", "recovery phase", "threat" | |
] | |
THRESHOLDS = { | |
"blame shifting": 0.3, | |
"contradictory statements": 0.3, | |
"control": 0.35, | |
"dismissiveness": 0.4, | |
"gaslighting": 0.3, | |
"guilt tripping": 0.3, | |
"insults": 0.3, | |
"obscure language": 0.4, | |
"projection": 0.4, | |
"recovery phase": 0.35, | |
"threat": 0.3 | |
} | |
PATTERN_WEIGHTS = { | |
"gaslighting": 1.3, | |
"control": 1.2, | |
"dismissiveness": 0.8, | |
"blame shifting": 0.8, | |
"contradictory statements": 0.75, | |
"threat": 1.5 | |
} | |
DARVO_PATTERNS = set([ | |
"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements" | |
]) | |
DARVO_MOTIFS = [ | |
"I never said that.", "You’re imagining things.", "That never happened.", | |
"You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.", | |
"I don’t know what you’re talking about.", "You’re overreacting.", "I didn’t mean it that way.", | |
"You’re twisting my words.", "You’re remembering it wrong.", "You’re always looking for something to complain about.", | |
"You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.", | |
"You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.", | |
"You’re always so dramatic.", "You’re just trying to make me look bad.", | |
"You’re crazy.", "You’re the one with the problem.", "You’re always so negative.", | |
"You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.", | |
"You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.", | |
"You’re the one causing all the problems.", "You’re just trying to make me feel guilty.", | |
"You’re the one who can’t let go of the past.", "You’re the one who’s always angry.", | |
"You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.", | |
"You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.", | |
"You’re the one who’s always making me look like the bad guy.", | |
"You’re the one who’s always making me feel like a failure.", | |
"You’re the one who’s always making me feel like I’m not good enough.", | |
"I can’t believe you’re doing this to me.", "You’re hurting me.", | |
"You’re making me feel like a terrible person.", "You’re always blaming me for everything.", | |
"You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.", | |
"You’re the one who’s toxic.", "You’re the one who’s gaslighting me.", | |
"You’re the one who’s always putting me down.", | |
"You’re the one who’s always making me feel like I’m the problem.", | |
"You’re the one who’s always making me feel like the bad guy.", | |
"You’re the one who’s always making me feel like the villain.", | |
"You’re the one who’s always making me feel like the one who needs to change.", | |
"You’re the one who’s always making me feel like the one who’s wrong.", | |
"You’re the one who’s always making me feel like the one who’s crazy.", | |
"You’re the one who’s always making me feel like the one who’s abusive.", | |
"You’re the one who’s always making me feel like the one who’s toxic." | |
] | |
RISK_STAGE_LABELS = { | |
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.", | |
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.", | |
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.", | |
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it." | |
} | |
ESCALATION_QUESTIONS = [ | |
("Partner has access to firearms or weapons", 4), | |
("Partner threatened to kill you", 3), | |
("Partner threatened you with a weapon", 3), | |
("Partner has ever choked you", 4), | |
("Partner injured or threatened your pet(s)", 3), | |
("Partner has broken your things, punched walls, or thrown objects", 2), | |
("Partner forced or coerced you into unwanted sexual acts", 3), | |
("Partner threatened to take away your children", 2), | |
("Violence has increased in frequency or severity", 3), | |
("Partner monitors your calls/GPS/social media", 2) | |
] | |
def detect_contradiction(message): | |
patterns = [ | |
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE), | |
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE), | |
(r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE), | |
(r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE), | |
(r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE), | |
(r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE) | |
] | |
return any(re.search(pat, message, flags) for pat, flags in patterns) | |
def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False): | |
hits = len([p for p in patterns if p in DARVO_PATTERNS]) | |
p_score = hits / len(DARVO_PATTERNS) | |
s_shift = max(0.0, sentiment_after - sentiment_before) | |
m_hits = len([m for m in motifs_found if any(f.lower() in m.lower() for f in DARVO_MOTIFS)]) | |
m_score = m_hits / len(DARVO_MOTIFS) | |
c_score = 1.0 if contradiction_flag else 0.0 | |
raw = 0.3*p_score + 0.3*s_shift + 0.25*m_score + 0.15*c_score | |
return round(min(raw,1.0),3) | |
def generate_risk_snippet(abuse_score, top_label, escalation_score, stage): | |
label = top_label.split(" – ")[0] | |
why = { | |
"control": "This message may reflect efforts to restrict someone’s autonomy.", | |
"gaslighting": "This message could be manipulating perception.", | |
"dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.", | |
"insults": "Direct insults can erode emotional safety.", | |
"threat": "This message includes threatening language—a strong predictor of harm.", | |
"blame shifting": "This message may redirect responsibility to avoid accountability.", | |
"guilt tripping": "This message may induce guilt to control behavior.", | |
"recovery phase": "This message may be a tension-reset without change.", | |
"projection": "This message may attribute the speaker’s faults to the target.", | |
}.get(label, "This message contains language patterns that may affect safety.") | |
if abuse_score>=85 or escalation_score>=16: | |
lvl = "high" | |
elif abuse_score>=60 or escalation_score>=8: | |
lvl = "moderate" | |
else: | |
lvl = "low" | |
return f"\n\n🛑 Risk Level: {lvl.capitalize()}\nThis message shows **{label}**.\n💡 *Why:* {why}\n" | |
def detect_weapon_language(text): | |
kws = ["knife","gun","bomb","kill you","shoot","explode"] | |
t = text.lower() | |
return any(k in t for k in kws) | |
def get_risk_stage(patterns, sentiment): | |
if "threat" in patterns or "insults" in patterns: | |
return 2 | |
if "control" in patterns or "guilt tripping" in patterns: | |
return 1 | |
if "recovery phase" in patterns: | |
return 3 | |
if sentiment == "supportive" and any(p in patterns for p in ["projection","dismissiveness"]): | |
return 4 | |
return 1 | |
# --- Visualization --- | |
def generate_abuse_score_chart(dates, scores, labels): | |
try: | |
parsed = [datetime.strptime(d, "%Y-%m-%d") for d in dates] | |
except: | |
parsed = list(range(len(dates))) | |
fig, ax = plt.subplots(figsize=(8,3)) | |
ax.plot(parsed, scores, marker='o', linestyle='-', color='darkred', linewidth=2) | |
for i,(x,y) in enumerate(zip(parsed,scores)): | |
ax.text(x, y+2, f"{labels[i]}\n{int(y)}%", ha='center', fontsize=8) | |
ax.set_title("Abuse Intensity Over Time") | |
ax.set_xlabel("Date"); ax.set_ylabel("Abuse Score (%)") | |
ax.set_ylim(0,105); ax.grid(True); plt.tight_layout() | |
buf = io.BytesIO(); plt.savefig(buf, format='png'); buf.seek(0) | |
return Image.open(buf) | |
# --- Load Models & Pipelines --- | |
model_name = "SamanthaStorm/tether-multilabel-v2" | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) | |
healthy_detector = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english") | |
sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") | |
# --- Single-Message Analysis --- | |
def analyze_single_message(text): | |
# healthy bypass | |
healthy = healthy_detector(text)[0] | |
if healthy['label'] == "POSITIVE" and healthy['score'] > 0.9: | |
return {"abuse_score":0, "labels":[], "sentiment":"supportive", "stage":4, "darvo_score":0.0, "top_patterns":[]} | |
# model scoring | |
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True) | |
with torch.no_grad(): | |
logits = model(**inputs).logits.squeeze(0) | |
probs = torch.sigmoid(logits).numpy() | |
labels = [lab for lab,p in zip(LABELS,probs) if p > THRESHOLDS[lab]] | |
# abuse score weighted | |
total_w = sum(PATTERN_WEIGHTS.get(l,1.0) for l in LABELS) | |
abuse_score = int(round(sum(probs[i]*PATTERN_WEIGHTS.get(l,1.0) for i,l in enumerate(LABELS))/total_w*100)) | |
# sentiment shift | |
sst = sst_pipeline(text)[0] | |
sentiment = 'supportive' if sst['label']=='POSITIVE' else 'undermining' | |
sent_score = sst['score'] if sentiment=='undermining' else 0.0 | |
# DARVO | |
motif_hits, matched = detect_motifs(text) | |
contradiction = detect_contradiction(text) | |
darvo_score = calculate_darvo_score(labels, 0.0, sent_score, matched, contradiction) | |
# stage + weapon | |
stage = get_risk_stage(labels, sentiment) | |
if detect_weapon_language(text): | |
abuse_score = min(abuse_score+25,100) | |
stage = max(stage,2) | |
# top patterns | |
top_patterns = sorted(zip(LABELS,probs), key=lambda x: x[1], reverse=True)[:2] | |
return {"abuse_score":abuse_score, "labels":labels, "sentiment":sentiment, "stage":stage, "darvo_score":darvo_score, "top_patterns":top_patterns} | |
# --- Composite Analysis --- | |
def analyze_composite(msg1, date1, msg2, date2, msg3, date3, *answers_and_none): | |
none_checked = answers_and_none[-1] | |
any_checked = any(answers_and_none[:-1]) | |
none_sel = (not any_checked) and none_checked | |
if none_sel: | |
esc_score = None; risk_level = 'unknown' | |
else: | |
esc_score = sum(w for (_,w),a in zip(ESCALATION_QUESTIONS, answers_and_none[:-1]) if a) | |
risk_level = 'High' if esc_score>=16 else 'Moderate' if esc_score>=8 else 'Low' | |
msgs = [msg1, msg2, msg3] | |
dates = [date1, date2, date3] | |
active = [(m,d) for m,d in zip(msgs, dates) if m.strip()] | |
if not active: | |
return "Please enter at least one message." | |
results = [(analyze_single_message(m), d) for m,d in active] | |
abuse_scores = [r[0]['abuse_score'] for r in results] | |
top_labels = [r[0]['top_patterns'][0][0] if r[0]['top_patterns'] else 'None' for r,_ in results] | |
dates_used = [d or 'Undated' for _,d in results] | |
# common stage | |
stage_list = [r[0]['stage'] for r,_ in results] | |
most_common_stage = max(set(stage_list), key=stage_list.count) | |
composite_abuse = int(round(sum(abuse_scores)/len(abuse_scores))) | |
# DARVO summary | |
darvo_vals = [r[0]['darvo_score'] for r,_ in results] | |
avg_darvo = round(sum(darvo_vals)/len(darvo_vals),3) | |
darvo_blurb = '' | |
if avg_darvo>0.25: | |
lvl = 'moderate' if avg_darvo<0.65 else 'high' | |
darvo_blurb = f"\n🎭 DARVO Score: {avg_darvo} ({lvl})" | |
# output text | |
out = f"Abuse Intensity: {composite_abuse}%\n" | |
if esc_score is None: | |
out += "Escalation Potential: Unknown (Checklist not completed)\n" | |
else: | |
out += f"Escalation Potential: {risk_level} ({esc_score}/{sum(w for _,w in ESCALATION_QUESTIONS)})\n" | |
# risk snippet | |
pattern_score = f"{top_labels[0]} – {int(results[0][0]['top_patterns'][0][1]*100)}%" if results[0][0]['top_patterns'] else top_labels[0] | |
out += generate_risk_snippet(composite_abuse, pattern_score, esc_score or 0, most_common_stage) | |
out += darvo_blurb | |
img = generate_abuse_score_chart(dates_used, abuse_scores, top_labels) | |
return out, img | |
# --- Gradio Interface --- | |
message_date_pairs = [ | |
(gr.Textbox(label=f"Message {i+1}"), gr.Textbox(label=f"Date {i+1} (optional)", placeholder="YYYY-MM-DD")) | |
for i in range(3) | |
] | |
textbox_inputs = [item for pair in message_date_pairs for item in pair] | |
quiz_boxes = [gr.Checkbox(label=q) for q,_ in ESCALATION_QUESTIONS] | |
none_box = gr.Checkbox(label="None of the above") | |
iface = gr.Interface( | |
fn=analyze_composite, | |
inputs=textbox_inputs + quiz_boxes + [none_box], | |
outputs=[gr.Textbox(label="Results"), gr.Image(label="Risk Stage Timeline", type="pil")], | |
title="Tether Abuse Pattern Detector v2", | |
allow_flagging="manual" | |
) | |
if __name__ == "__main__": | |
iface.launch() | |