Spaces:

SamanthaStorm
/

Tether

Running

File size: 8,700 Bytes

d6e219c
f1948f2
 
909b775
e032990
b54664e
6153eb8
0ff864f
909b775
 
2f6ac5d
f32b7e3
1dbc865
92b1518
 
a9d4250
e185e86
1dbc865
 
 
e185e86
 
 
f32b7e3
 
 
e185e86
 
2f6ac5d
f32b7e3
 
2f6ac5d
aeed86a
e185e86
f32b7e3
 
 
 
 
 
 
 
 
 
 
e185e86
 
d33c30b
1dbc865
 
 
f32b7e3
1dbc865
 
 
f32b7e3
 
1dbc865
 
 
f32b7e3
 
1dbc865
d33c30b
 
6139d49
 
 
 
 
 
 
 
 
f32b7e3
 
 
 
 
 
 
 
 
 
 
 
 
6139d49
f32b7e3
6139d49
 
 
 
 
623a77f
6139d49
f32b7e3
6139d49
 
f32b7e3
6139d49
 
f32b7e3
6139d49
 
f32b7e3
6139d49
6cbeaf8
 
 
 
 
 
 
 
 
1e3558a
9ab7ab9
9f4751d
a568262
909b775
 
 
 
a568262
f32b7e3
6cbeaf8
f32b7e3
 
a568262
f32b7e3
6cbeaf8
 
9ab7ab9
 
 
 
9f4751d
6cbeaf8
 
 
 
 
 
 
 
 
9f4751d
6cbeaf8
9f4751d
 
6cbeaf8
 
 
 
 
909b775
6cbeaf8
f32b7e3
d315105
 
 
623a77f
 
b883fe8
1dbc865
d315105
 
1dbc865
cb6b46c
623a77f
1dbc865
623a77f
f32b7e3
 
 
cb6b46c
d315105
f32b7e3
 
6139d49
623a77f
 
d315105
ec5f81e
f32b7e3
d315105
 
a28ef35
ab8c96f
1dbc865
d315105
1dbc865
d315105
1dbc865
ab8c96f
4292d1b
1dbc865
909b775

import gradio as gr
import torch
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import RobertaForSequenceClassification, RobertaTokenizer
from motif_tagging import detect_motifs
import re

# --- SST Sentiment Model ---
sst_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# --- Abuse Model ---
model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
model = RobertaForSequenceClassification.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)

LABELS = [
    "blame shifting", "contradictory statements", "control", "dismissiveness",
    "gaslighting", "guilt tripping", "insults", "obscure language",
    "projection", "recovery phase", "threat"
]

THRESHOLDS = {
    "blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.48, "dismissiveness": 0.45,
    "gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
    "projection": 0.35, "recovery phase": 0.25, "threat": 0.25
}

PATTERN_WEIGHTS = {
    "gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8,
    "blame shifting": 0.8, "contradictory statements": 0.75
}

EXPLANATIONS = {
    "blame shifting": "Blame-shifting redirects responsibility to avoid accountability.",
    "contradictory statements": "Flipping positions or denying previous claims.",
    "control": "Attempts to restrict another person’s autonomy.",
    "dismissiveness": "Disregarding or belittling someone’s feelings or needs.",
    "gaslighting": "Manipulating someone into questioning their reality.",
    "guilt tripping": "Using guilt to control or pressure.",
    "insults": "Derogatory or demeaning language.",
    "obscure language": "Vague, superior, or confusing language used manipulatively.",
    "projection": "Accusing someone else of your own behaviors.",
    "recovery phase": "Resetting tension without real change.",
    "threat": "Using fear or harm to control or intimidate."
}

RISK_SNIPPETS = {
    "low": (
        "🟢 Risk Level: Low",
        "The language patterns here do not strongly indicate abuse.",
        "Check in with yourself and monitor for repeated patterns."
    ),
    "moderate": (
        "⚠️ Risk Level: Moderate to High",
        "Language includes control, guilt, or reversal tactics.",
        "These patterns reduce self-trust. Document or talk with someone safe."
    ),
    "high": (
        "🛑 Risk Level: High",
        "Strong indicators of coercive control or threat present.",
        "Consider building a safety plan or contacting support."
    )
}

DARVO_PATTERNS = {
    "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
}
DARVO_MOTIFS = [
    "i guess i’m the bad guy", "after everything i’ve done", "you always twist everything",
    "so now it’s all my fault", "i’m the villain", "i’m always wrong", "you never listen",
    "you’re attacking me", "i’m done trying", "i’m the only one who cares"
]

ESCALATION_QUESTIONS = [
    ("Partner has access to firearms or weapons", 4),
    ("Partner threatened to kill you", 3),
    ("Partner threatened you with a weapon", 3),
    ("Partner has ever choked you, even if you considered it consensual at the time", 4),
    ("Partner injured or threatened your pet(s)", 3),
    ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
    ("Partner forced or coerced you into unwanted sexual acts", 3),
    ("Partner threatened to take away your children", 2),
    ("Violence has increased in frequency or severity", 3),
    ("Partner monitors your calls/GPS/social media", 2)
]

def detect_contradiction(message):
    patterns = [
        (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
        (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
        (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
        (r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
        (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
        (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
    ]
    return any(re.search(p, message, flags) for p, flags in patterns)

def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
    pattern_hits = len([p for p in patterns if p in DARVO_PATTERNS])
    pattern_score = pattern_hits / len(DARVO_PATTERNS)
    sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
    motif_hits = len([m for m in motifs_found if m.lower() in DARVO_MOTIFS])
    motif_score = motif_hits / len(DARVO_MOTIFS)
    contradiction_score = 1.0 if contradiction_flag else 0.0
    return round(min(0.3 * pattern_score + 0.3 * sentiment_shift_score + 0.25 * motif_score + 0.15 * contradiction_score, 1.0), 3)

def generate_risk_snippet(abuse_score, top_label):
    if abuse_score >= 85:
        risk_level = "high"
    elif abuse_score >= 60:
        risk_level = "moderate"
    else:
        risk_level = "low"
    title, summary, advice = RISK_SNIPPETS[risk_level]
    return f"\n\n{title}\n{summary} (Pattern: **{str(top_label)}**)\n💡 {advice}"

def analyze_single_message(text, thresholds, motif_flags):
    motif_hits, matched_phrases = detect_motifs(text)

    # SST Sentiment
    result = sst_pipeline(text)[0]
    sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
    sentiment_score = result['score'] if sentiment == "undermining" else 0.0

    adjusted_thresholds = {
        k: v + 0.05 if sentiment == "supportive" else v
        for k, v in thresholds.items()
    }

    contradiction_flag = detect_contradiction(text)
    motifs = [phrase for _, phrase in matched_phrases]

    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()

    threshold_labels = [
        label for label, score in zip(LABELS, scores)
        if score > adjusted_thresholds[label]
    ]
    top_patterns = sorted(
        [(label, score) for label, score in zip(LABELS, scores)],
        key=lambda x: x[1],
        reverse=True
    )[:2]

    pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
    darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)

    return (
        np.mean([score for _, score in top_patterns]) * 100,
        threshold_labels,
        top_patterns,
        darvo_score,
        {"label": sentiment, "raw_label": result['label'], "score": result['score']}
    )

def analyze_composite(msg1, msg2, msg3, *answers_and_none):
    responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
    none_selected = answers_and_none[-1]
    escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
    escalation_level = "High" if escalation_score >= 16 else "Moderate" if escalation_score >= 8 else "Low"

    messages = [msg1, msg2, msg3]
    active = [m for m in messages if m.strip()]
    if not active:
        return "Please enter at least one message."

    results = [analyze_single_message(m, THRESHOLDS.copy(), []) for m in active]
    abuse_scores = [r[0] for r in results]
    darvo_scores = [r[3] for r in results]
    top_label = max({label for r in results for label in r[2]}, key=lambda l: abuse_scores[0])
    composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
    avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)

    out = f"Abuse Intensity: {composite_abuse}%\n"
    out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
    out += generate_risk_snippet(composite_abuse, top_label)
    if avg_darvo > 0.25:
        level = "moderate" if avg_darvo < 0.65 else "high"
        out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
    return out

textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
none_box = gr.Checkbox(label="None of the above")

iface = gr.Interface(
    fn=analyze_composite,
    inputs=textbox_inputs + quiz_boxes + [none_box],
    outputs=gr.Textbox(label="Results"),
    title="Abuse Pattern Detector + Escalation Quiz",
    allow_flagging="manual"
)

if __name__ == "__main__":
    iface.launch()