Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
import numpy as np | |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
from motif_tagging import detect_motifs | |
import re | |
import matplotlib.pyplot as plt | |
import io | |
from PIL import Image | |
from datetime import datetime | |
# ——— DARVO & Risk Utilities ——— | |
DARVO_PATTERNS = {"blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"} | |
DARVO_MOTIFS = [ | |
"I never said that.", "You’re imagining things.", "That never happened.", | |
"You’re making a big deal out of nothing.", "It was just a joke.", "You’re too sensitive.", | |
"I don’t know what you’re talking about.", "You’re overreacting.", "I didn’t mean it that way.", | |
"You’re twisting my words.", "You’re remembering it wrong.", "You’re always looking for something to complain about.", | |
"You’re just trying to start a fight.", "I was only trying to help.", "You’re making things up.", | |
"You’re blowing this out of proportion.", "You’re being paranoid.", "You’re too emotional.", | |
"You’re always so dramatic.", "You’re just trying to make me look bad.", | |
"You’re crazy.", "You’re the one with the problem.", "You’re always so negative.", | |
"You’re just trying to control me.", "You’re the abusive one.", "You’re trying to ruin my life.", | |
"You’re just jealous.", "You’re the one who needs help.", "You’re always playing the victim.", | |
"You’re the one causing all the problems.", "You’re just trying to make me feel guilty.", | |
"You’re the one who can’t let go of the past.", "You’re the one who’s always angry.", | |
"You’re the one who’s always complaining.", "You’re the one who’s always starting arguments.", | |
"You’re the one who’s always making things worse.", "You’re the one who’s always making me feel bad.", | |
"You’re the one who’s always making me look like the bad guy.", | |
"You’re the one who’s always making me feel like a failure.", "You’re the one who’s always making me feel like I’m not good enough.", | |
"I can’t believe you’re doing this to me.", "You’re hurting me.", "You’re making me feel like a terrible person.", | |
"You’re always blaming me for everything.", "You’re the one who’s abusive.", "You’re the one who’s controlling.", | |
"You’re the one who’s manipulative.", "You’re the one who’s toxic.", "You’re the one who’s gaslighting me.", | |
"You’re the one who’s always putting me down.", "You’re the one who’s always making me feel bad.", | |
"You’re the one who’s always making me feel like I’m not good enough.", "You’re the one who’s always making me feel like I’m the problem.", | |
"You’re the one who’s always making me feel like I’m the bad guy.", "You’re the one who’s always making me feel like I’m the villain.", | |
"You’re the one who’s always making me feel like I’m the one who needs to change.", | |
"You’re the one who’s always making me feel like I’m the one who’s wrong.", | |
"You’re the one who’s always making me feel like I’m the one who’s crazy.", | |
"You’re the one who’s always making me feel like I’m the one who’s abusive.", | |
"You’re the one who’s always making me feel like I’m the one who’s toxic." | |
] | |
PATTERN_WEIGHTS = { | |
"gaslighting": 1.3, | |
"control": 1.2, | |
"dismissiveness": 0.8, | |
"blame shifting": 0.8, | |
"contradictory statements": 0.75, | |
"threat": 1.5, | |
} | |
RISK_STAGE_LABELS = { | |
1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.", | |
2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.", | |
3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.", | |
4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it." | |
} | |
ESCALATION_QUESTIONS = [ | |
("Partner has access to firearms or weapons", 4), | |
("Partner threatened to kill you", 3), | |
("Partner threatened you with a weapon", 3), | |
("Partner has ever choked you", 4), | |
("Partner injured or threatened your pet(s)", 3), | |
("Partner has broken your things, punched walls, or thrown objects", 2), | |
("Partner forced or coerced you into unwanted sexual acts", 3), | |
("Partner threatened to take away your children", 2), | |
("Violence has increased in frequency or severity", 3), | |
("Partner monitors your calls/GPS/social media", 2) | |
] | |
def detect_contradiction(message): | |
patterns = [ | |
(r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE), | |
(r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE), | |
# ... other patterns ... | |
] | |
return any(re.search(pat, message, flags) for pat, flags in patterns) | |
def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False): | |
hits = len([p for p in patterns if p in DARVO_PATTERNS]) | |
p_score = hits / len(DARVO_PATTERNS) | |
s_shift = max(0.0, sentiment_after - sentiment_before) | |
m_hits = len([m for m in motifs_found if any(f.lower() in m.lower() for f in DARVO_MOTIFS)]) | |
m_score = m_hits / len(DARVO_MOTIFS) | |
c_score = 1.0 if contradiction_flag else 0.0 | |
raw = 0.3*p_score + 0.3*s_shift + 0.25*m_score + 0.15*c_score | |
return round(min(raw,1.0),3) | |
def generate_risk_snippet(abuse_score, top_label, escalation_score, stage): | |
label = top_label.split(" – ")[0] | |
why = { | |
"control": "This message may reflect efforts to restrict someone’s autonomy.", | |
"gaslighting": "This message could be manipulating perception.", | |
# ... other explanations ... | |
}.get(label, "This message contains language patterns that may affect safety.") | |
if abuse_score>=85 or escalation_score>=16: | |
lvl="high" | |
elif abuse_score>=60 or escalation_score>=8: | |
lvl="moderate" | |
else: | |
lvl="low" | |
return f"\n\n🛑 Risk Level: {lvl.capitalize()}\nThis message shows **{label}**.\n💡 Why: {why}\n" | |
def detect_weapon_language(text): | |
kws=["knife","gun","bomb","kill you","shoot","explode"] | |
t=text.lower() | |
return any(k in t for k in kws) | |
def get_risk_stage(patterns, sentiment): | |
if "threat" in patterns or "insults" in patterns: return 2 | |
if "control" in patterns or "guilt tripping" in patterns: return 1 | |
if "recovery phase" in patterns: return 3 | |
if sentiment=="supportive" and any(p in patterns for p in ["projection","dismissiveness"]): return 4 | |
return 1 | |
# --- Timeline Visualization --- | |
def generate_abuse_score_chart(dates, scores, labels): | |
try: | |
parsed=[datetime.strptime(d,"%Y-%m-%d") for d in dates] | |
except: | |
parsed=list(range(len(dates))) | |
fig,ax=plt.subplots(figsize=(8,3)) | |
ax.plot(parsed,scores,marker='o',linestyle='-',color='darkred',linewidth=2) | |
for i,(x,y) in enumerate(zip(parsed,scores)): | |
ax.text(x,y+2,f"{labels[i]}\n{int(y)}%",ha='center',fontsize=8) | |
ax.set(title="Abuse Intensity Over Time",xlabel="Date",ylabel="Abuse Score (%)") | |
ax.set_ylim(0,105);ax.grid(True);plt.tight_layout() | |
buf=io.BytesIO();plt.savefig(buf,format='png');buf.seek(0) | |
return Image.open(buf) | |
# --- Load and initialize models --- | |
model_name="SamanthaStorm/tether-multilabel-v2" | |
model=AutoModelForSequenceClassification.from_pretrained(model_name) | |
tokenizer=AutoTokenizer.from_pretrained(model_name, use_fast=False) | |
healthy_detector=pipeline("text-classification",model="distilbert-base-uncased-finetuned-sst-2-english") | |
sst_pipeline=pipeline("sentiment-analysis",model="distilbert-base-uncased-finetuned-sst-2-english") | |
LABELS=[ | |
"blame shifting","contradictory statements","control","dismissiveness", | |
"gaslighting","guilt tripping","insults","obscure language", | |
"projection","recovery phase","threat" | |
] | |
THRESHOLDS={l:THRESHOLDS.get(l,0.3) for l in LABELS} | |
# --- Single Message Analysis --- | |
def analyze_single_message(text): | |
if healthy_detector(text)[0]['label']=="POSITIVE" and healthy_detector(text)[0]['score']>0.9: | |
return {"abuse_score":0,"labels":[],"sentiment":"supportive","stage":4,"darvo_score":0.0,"top_patterns":[]} | |
inputs=tokenizer(text,return_tensors='pt',padding=True,truncation=True) | |
with torch.no_grad(): outputs=model(**inputs).logits.squeeze(0) | |
probs=torch.sigmoid(outputs).numpy() | |
labels=[lab for lab,p in zip(LABELS,probs) if p>THRESHOLDS[lab]] | |
# weighted score | |
total_w=sum(PATTERN_WEIGHTS.get(l,1.0) for l in LABELS) | |
abuse_score=int(round(sum(probs[i]*PATTERN_WEIGHTS.get(l,1.0) for i,l in enumerate(LABELS))/total_w*100)) | |
# sentiment | |
sst=sst_pipeline(text)[0] | |
sentiment='supportive' if sst['label']=='POSITIVE' else 'undermining' | |
sent_score=sst['score'] if sentiment=='undermining' else 0.0 | |
# DARVO | |
motifs,matched=detect_motifs(text) | |
contradiction=detect_contradiction(text) | |
darvo=calculate_darvo_score(labels,0.0,sent_score,matched,contradiction) | |
# stage + weapon | |
stage=get_risk_stage(labels,sentiment) | |
if detect_weapon_language(text): abuse_score=min(abuse_score+25,100); stage=max(stage,2) | |
# top patterns | |
top_patterns=sorted(zip(LABELS,probs),key=lambda x:x[1],reverse=True)[:2] | |
return {"abuse_score":abuse_score,"labels":labels,"sentiment":sentiment, | |
"stage":stage,"darvo_score":darvo,"top_patterns":top_patterns} | |
# --- Composite Analysis --- | |
def analyze_composite(m1,d1,m2,d2,m3,d3,*answers): | |
none_sel=answers[-1] and not any(answers[:-1]) | |
if none_sel: esc=None; risk='unknown' | |
else: | |
esc=sum(w for (_,w),a in zip(ESCALATION_QUESTIONS,answers[:-1]) if a) | |
risk='High' if esc>=16 else 'Moderate' if esc>=8 else 'Low' | |
msgs=[m1,m2,m3]; dates=[d1,d2,d3] | |
active=[(m,d) for m,d in zip(msgs,dates) if m.strip()] | |
if not active: return "Please enter at least one message." | |
results=[(analyze_single_message(m),d) for m,d in active] | |
abuse_scores=[r[0]['abuse_score'] for r in results] | |
top_lbls=[r[0]['top_patterns'][0][0] if r[0]['top_patterns'] else 'None' for r in results] | |
dates_used=[d or 'Undated' for (_,d) in results] | |
stage_list=[r[0]['stage'] for r,_ in results] | |
most_common_stage=max(set(stage_list),key=stage_list.count) | |
composite_abuse=int(round(sum(abuse_scores)/len(abuse_scores))) | |
out=f"Abuse Intensity: {composite_abuse}%\n" | |
if esc is None: out+="Escalation Potential: Unknown (Checklist not completed)\n" | |
else: out+=f"Escalation Potential: {risk} ({esc}/{sum(w for _,w in ESCALATION_QUESTIONS)})\n" | |
# DARVO summary | |
darvos=[r[0]['darvo_score'] for r,_ in results] | |
avg_d=sum(darvos)/len(darvos) | |
if avg_d>0.25: lvl='moderate' if avg_d<0.65 else 'high'; out+=f"\n🎭 DARVO Score: {round(avg_d,3)} ({lvl})\n" | |
out+=generate_risk_snippet(composite_abuse,f"{top_lbls[0]} – {int(top_patterns[0][1]*100)}%",esc or 0,most_common_stage) | |
img=generate_abuse_score_chart(dates_used,abuse_scores,top_lbls) | |
return out,img | |
# --- UI --- | |
message_date_pairs=[(gr.Textbox(label=f"Message {i+1}"),gr.Textbox(label=f"Date {i+1} (optional)",placeholder="YYYY-MM-DD")) for i in range(3)] | |
quiz_boxes=[gr.Checkbox(label=q) for q,_ in ESCALATION_QUESTIONS]; none_box=gr.Checkbox(label="None of the above") | |
iface=gr.Interface(fn=analyze_composite,inputs=[item for pair in message_date_pairs for item in pair]+quiz_boxes+[none_box],outputs=[gr.Textbox(label="Results"),gr.Image(label="Risk Stage Timeline",type="pil")], | |
title="Tether Abuse Pattern Detector v2",allow_flagging="manual") | |
if __name__=="__main__": iface.launch() | |