Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
import numpy as np | |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
from motif_tagging import detect_motifs | |
import re | |
import matplotlib.pyplot as plt | |
import io | |
from PIL import Image | |
from datetime import datetime | |
# ——— Constants ——— | |
LABELS = [ | |
"blame shifting", "contradictory statements", "control", "dismissiveness", | |
"gaslighting", "guilt tripping", "insults", "obscure language", | |
"projection", "recovery phase", "threat" | |
] | |
# <- Restore your exact thresholds here: | |
THRESHOLDS = { | |
"blame shifting": 0.3, "contradictory statements": 0.3, "control": 0.35, "dismissiveness": 0.4, | |
"gaslighting": 0.3, "guilt tripping": 0.3, "insults": 0.3, "obscure language": 0.4, | |
"projection": 0.4, "recovery phase": 0.35, "threat": 0.3 | |
} | |
PATTERN_WEIGHTS = { | |
"gaslighting": 1.3, | |
"control": 1.2, | |
"dismissiveness": 0.8, | |
"blame shifting": 0.8, | |
"contradictory statements": 0.75, | |
"threat": 1.5 | |
} | |
DARVO_PATTERNS = { | |
"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements" | |
} | |
DARVO_MOTIFS = [ | |
"I never said that.", "You’re imagining things.", "That never happened.", | |
"You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.", | |
"I don’t know what you’re talking about.", "You’re overreacting.", "I didn’t mean it that way.", | |
"You’re twisting my words.", "You’re remembering it wrong.", "You’re always looking for something to complain about.", | |
"You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.", | |
"You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.", | |
"You’re always so dramatic.", "You’re just trying to make me look bad.", | |
"You’re crazy.", "You’re the one with the problem.", "You’re always so negative.", | |
"You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.", | |
"You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.", | |
"You’re the one causing all the problems.", "You’re just trying to make me feel guilty.", | |
"You’re the one who can’t let go of the past.", "You’re the one who’s always angry.", | |
"You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.", | |
"You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.", | |
"You’re the one who’s always making me look like the bad guy.", | |
"You’re the one who’s always making me feel like a failure.", | |
"You’re the one who’s always making me feel like I’m not good enough.", | |
"I can’t believe you’re doing this to me.", "You’re hurting me.", | |
"You’re making me feel like a terrible person.", "You’re always blaming me for everything.", | |
"You’re the one who’s abusive.", "You’re the one who’s controlling.", "You’re the one who’s manipulative.", | |
"You’re the one who’s toxic.", "You’re the one who’s gaslighting me.", | |
"You’re the one who’s always putting me down.", "You’re the one who’s always making me feel bad.", | |
"You’re the one who’s always making me feel like I’m not good enough.", | |
"You’re the one who’s always making me feel like I’m the problem.", | |
"You’re the one who’s always making me feel like I’m the bad guy.", | |
"You’re the one who’s always making me feel like I’m the villain.", | |
"You’re the one who’s always making me feel like I’m the one who needs to change.", | |
"You’re the one who’s always making me feel like I’m the one who’s wrong.", | |
"You’re the one who’s always making me feel like I’m the one who’s crazy.", | |
"You’re the one who’s always making me feel like I’m the one who’s abusive.", | |
"You’re the one who’s always making me feel like I’m the one who’s toxic." | |
] | |
RISK_STAGE_LABELS = { | |
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.", | |
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.", | |
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.", | |
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it." | |
} | |
ESCALATION_QUESTIONS = [ | |
("Partner has access to firearms or weapons", 4), | |
("Partner threatened to kill you", 3), | |
("Partner threatened you with a weapon", 3), | |
("Partner has ever choked you", 4), | |
("Partner injured or threatened your pet(s)", 3), | |
("Partner has broken your things, punched walls, or thrown objects", 2), | |
("Partner forced or coerced you into unwanted sexual acts", 3), | |
("Partner threatened to take away your children", 2), | |
("Violence has increased in frequency or severity", 3), | |
("Partner monitors your calls, GPS, or social media", 2) | |
] | |
# ——— Helper Functions ——— | |
def detect_contradiction(message): | |
patterns = [ | |
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE), | |
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE), | |
(r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE), | |
(r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE), | |
(r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE), | |
(r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE) | |
] | |
return any(re.search(pat, message, flags) for pat, flags in patterns) | |
def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False): | |
hits = len([p for p in patterns if p in DARVO_PATTERNS]) | |
p_score = hits / len(DARVO_PATTERNS) | |
s_shift = max(0.0, sentiment_after - sentiment_before) | |
m_hits = len([m for m in motifs_found if any(f.lower() in m.lower() for f in DARVO_MOTIFS)]) | |
m_score = m_hits / len(DARVO_MOTIFS) | |
c_score = 1.0 if contradiction_flag else 0.0 | |
raw = 0.3*p_score + 0.3*s_shift + 0.25*m_score + 0.15*c_score | |
return round(min(raw,1.0),3) | |
def generate_risk_snippet(abuse_score, top_label, escalation_score, stage): | |
label = top_label.split(" – ")[0] | |
why = { | |
"control": "efforts to restrict autonomy.", | |
"gaslighting": "manipulating perception.", | |
"dismissiveness": "invalidating experience.", | |
"insults": "direct insults erode safety.", | |
"threat": "threatening language predicts harm.", | |
"blame shifting": "avoiding accountability.", | |
"guilt tripping": "inducing guilt to control behavior.", | |
"recovery phase": "tension-reset without change.", | |
"projection": "attributing faults to the other person." | |
}.get(label, "This message contains concerning patterns.") | |
if abuse_score>=85 or escalation_score>=16: | |
lvl = "high" | |
elif abuse_score>=60 or escalation_score>=8: | |
lvl = "moderate" | |
else: | |
lvl = "low" | |
return f"\n\n🛑 Risk Level: {lvl.capitalize()}\nThis message shows **{label}**.\n💡 Why: {why}\n" | |
def detect_weapon_language(text): | |
kws = ["knife","gun","bomb","kill you","shoot","explode"] | |
t = text.lower() | |
return any(k in t for k in kws) | |
def get_risk_stage(patterns, sentiment): | |
if "threat" in patterns or "insults" in patterns: | |
return 2 | |
if "control" in patterns or "guilt tripping" in patterns: | |
return 1 | |
if "recovery phase" in patterns: | |
return 3 | |
if sentiment=="supportive" and any(p in patterns for p in ["projection","dismissiveness"]): | |
return 4 | |
return 1 | |
def generate_abuse_score_chart(dates,scores,labels): | |
try: | |
parsed=[datetime.strptime(d,"%Y-%m-%d") for d in dates] | |
except: | |
parsed=range(len(dates)) | |
fig,ax=plt.subplots(figsize=(8,3)) | |
ax.plot(parsed,scores,marker='o',linestyle='-',color='darkred',linewidth=2) | |
for i,(x,y) in enumerate(zip(parsed,scores)): | |
ax.text(x,y+2,f"{labels[i]}\n{int(y)}%",ha='center',fontsize=8) | |
ax.set(title="Abuse Intensity Over Time",xlabel="Date",ylabel="Abuse Score (%)") | |
ax.set_ylim(0,105);ax.grid(True);plt.tight_layout() | |
buf=io.BytesIO();plt.savefig(buf,format='png');buf.seek(0) | |
return Image.open(buf) | |
# ——— Load Models & Pipelines ——— | |
model_name="SamanthaStorm/tether-multilabel-v2" | |
model=AutoModelForSequenceClassification.from_pretrained(model_name) | |
tokenizer=AutoTokenizer.from_pretrained(model_name, use_fast=False) | |
healthy_detector=pipeline("text-classification",model="distilbert-base-uncased-finetuned-sst-2-english") | |
sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") | |
# ——— Single-Message Analysis ——— | |
def analyze_single_message(text): | |
# healthy bypass | |
h = healthy_detector(text)[0] | |
# 1) Strongly positive → healthy | |
if h['label'] == "POSITIVE" and h['score'] > 0.8: | |
return { | |
"abuse_score": 0, | |
"labels": [], | |
"sentiment": "supportive", | |
"stage": 4, | |
"darvo_score": 0.0, | |
"top_patterns": [] | |
} | |
# 2) Mildly negative/neutral → also healthy | |
elif h['label'] == "NEGATIVE" and h['score'] < 0.6: | |
return { | |
"abuse_score": 0, | |
"labels": [], | |
"sentiment": "supportive", | |
"stage": 4, | |
"darvo_score": 0.0, | |
"top_patterns": [] | |
} | |
# — if neither healthy case, continue on to actual abuse detection — | |
inp = tokenizer(text, return_tensors='pt', truncation=True, padding=True) | |
with torch.no_grad(): logits=model(**inp).logits.squeeze(0) | |
probs=torch.sigmoid(logits).numpy() | |
# …run tokenizer, get `probs` and then: | |
labels = [lab for lab,p in zip(LABELS, probs) if p > THRESHOLDS[lab]] | |
# **NEW**: if absolutely no pattern is detected, force a zero‐abuse “healthy” return: | |
if not labels: | |
return { | |
"abuse_score": 0, | |
"labels": [], | |
"sentiment": "supportive", | |
"stage": 4, | |
"darvo_score": 0.0, | |
"top_patterns": [] | |
} | |
# abuse score | |
total_w=sum(PATTERN_WEIGHTS.get(l,1.0) for l in LABELS) | |
abuse_score=int(round(sum(probs[i]*PATTERN_WEIGHTS.get(l,1.0) | |
for i,l in enumerate(LABELS))/total_w*100)) | |
# sentiment shift | |
sst=sst_pipeline(text)[0] | |
sentiment='supportive' if sst['label']=='POSITIVE' else 'undermining' | |
sent_score=sst['score'] if sentiment=='undermining' else 0.0 | |
# DARVO | |
motif_hits, matched = detect_motifs(text) | |
contradiction=detect_contradiction(text) | |
darvo_score=calculate_darvo_score(labels,0.0,sent_score,matched,contradiction) | |
# stage + weapon | |
stage=get_risk_stage(labels,sentiment) | |
if detect_weapon_language(text): | |
abuse_score=min(abuse_score+25,100) | |
stage=max(stage,2) | |
# top patterns | |
top_patterns=sorted(zip(LABELS,probs), key=lambda x:x[1], reverse=True)[:2] | |
return { | |
"abuse_score":abuse_score, "labels":labels, "sentiment":sentiment, | |
"stage":stage, "darvo_score":darvo_score, "top_patterns":top_patterns | |
} | |
# ——— Composite Analysis & UI ——— | |
def analyze_composite(m1, d1, m2, d2, m3, d3, *answers): | |
# determine if “None of the above” was the only checked box | |
none_sel = answers[-1] and not any(answers[:-1]) | |
if none_sel: | |
esc_score = None | |
risk_level = "unknown" | |
else: | |
esc_score = sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, answers[:-1]) if a) | |
risk_level = "High" if esc_score >= 16 else "Moderate" if esc_score >= 8 else "Low" | |
# collect only non-empty messages | |
msgs = [m1, m2, m3] | |
dates = [d1, d2, d3] | |
active = [(m, d) for m, d in zip(msgs, dates) if m.strip()] | |
if not active: | |
return "Please enter at least one message." | |
# analyze each message | |
results = [(analyze_single_message(m), d) for m, d in active] | |
# pull out scores and labels | |
abuse_scores = [res["abuse_score"] for res, _ in results] | |
top_labels = [res["top_patterns"][0][0] if res["top_patterns"] else "None" for res, _ in results] | |
dates_used = [d or "Undated" for _, d in results] | |
stages = [res["stage"] for res, _ in results] | |
# overall risk stage & composite abuse | |
most_common_stage = max(set(stages), key=stages.count) | |
composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores))) | |
# build the basic output text | |
out = f"Abuse Intensity: {composite_abuse}%\n" | |
if esc_score is None: | |
out += "Escalation Potential: Unknown (Checklist not completed)\n" | |
else: | |
total_possible = sum(w for _, w in ESCALATION_QUESTIONS) | |
out += f"Escalation Potential: {risk_level} ({esc_score}/{total_possible})\n" | |
# if zero abuse, skip risk snippet & DARVO | |
img = generate_abuse_score_chart(dates_used, abuse_scores, top_labels) | |
if composite_abuse == 0: | |
return out, img | |
# compute DARVO summary | |
darvos = [res["darvo_score"] for res, _ in results] | |
avg_darvo = round(sum(darvos) / len(darvos), 3) | |
darvo_blurb = ( | |
f"\n🎭 DARVO Score: {avg_darvo} ({'high' if avg_darvo >= 0.65 else 'moderate'})" | |
if avg_darvo > 0.25 else "" | |
) | |
# risk snippet (uses your generate_risk_snippet helper) | |
first_pattern = top_labels[0] | |
first_score = int(results[0][0]["top_patterns"][0][1] * 100) if results[0][0]["top_patterns"] else 0 | |
pattern_score = f"{first_pattern} – {first_score}%" | |
out += generate_risk_snippet(composite_abuse, pattern_score, esc_score or 0, most_common_stage) | |
out += darvo_blurb | |
return out, img | |
# ——— Gradio Interface ——— | |
message_date_pairs = [ | |
(gr.Textbox(label=f"Message {i+1}"), gr.Textbox(label=f"Date {i+1} (optional)", placeholder="YYYY-MM-DD")) | |
for i in range(3) | |
] | |
quiz_boxes = [gr.Checkbox(label=q) for q,_ in ESCALATION_QUESTIONS] | |
none_box = gr.Checkbox(label="None of the above") | |
iface = gr.Interface( | |
fn=analyze_composite, | |
inputs=[item for pair in message_date_pairs for item in pair] + quiz_boxes + [none_box], | |
outputs=[gr.Textbox(label="Results"), gr.Image(label="Risk Stage Timeline", type="pil")], | |
title="Tether Abuse Pattern Detector v2", | |
allow_flagging="manual" | |
) | |
if __name__ == "__main__": | |
iface.launch() |