Spaces:

SamanthaStorm
/

Tether

Running on Zero

File size: 7,408 Bytes

d6e219c
f1948f2
 
4472a1d
cc98c96
0ff864f
cc98c96
 
 
f1948f2
cc98c96
 
 
 
a9d4250
79936aa
4dbf68b
 
 
79936aa
f1948f2
c303ab8
4dbf68b
 
 
 
c303ab8
4292d1b
43095bd
 
 
94e76c4
4dbf68b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94e76c4
23bb2d2
4472a1d
 
 
 
 
 
 
 
 
 
43095bd
 
4dbf68b
43095bd
23bb2d2
4dbf68b
 
 
 
 
43095bd
 
79936aa
f1948f2
23bb2d2
 
4472a1d
0ff864f
 
23bb2d2
4dbf68b
23bb2d2
cc98c96
f1948f2
cc98c96
4292d1b
79936aa
d20c9bb
43095bd
0ff864f
43095bd
4dbf68b
5bb1f05
947c124
43095bd
 
 
0ff864f
 
 
68ecdb1
43095bd
68ecdb1
79936aa
cc98c96
 
 
 
68ecdb1
43095bd
 
cc98c96
43095bd
4472a1d
43095bd
 
 
68ecdb1
94e76c4
 
68ecdb1
4dbf68b
 
0ff864f
68ecdb1
43095bd
 
 
 
 
 
b390ecc
5d7c4ba
ab8c96f
 
947c124
43095bd
 
4dbf68b
 
43095bd
 
4dbf68b
6476c8b
ab8c96f
4292d1b
5dfb1ca
5bb1f05

import gradio as gr
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import RobertaForSequenceClassification, RobertaTokenizer

# Load fine-tuned sentiment model from Hugging Face
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")

# Load abuse pattern model
model_name = "SamanthaStorm/Tether"
model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)

LABELS = [
    "gaslighting", "mockery", "dismissiveness", "control", "guilt_tripping", "apology_baiting", "blame_shifting", "projection",
    "contradictory_statements", "manipulation", "deflection", "insults", "obscure_formal", "recovery_phase", "non_abusive",
    "suicidal_threat", "physical_threat", "extreme_control"
]

THRESHOLDS = {
    "gaslighting": 0.25, "mockery": 0.15, "dismissiveness": 0.30, "control": 0.43, "guilt_tripping": 0.19,
    "apology_baiting": 0.45, "blame_shifting": 0.23, "projection": 0.50, "contradictory_statements": 0.25,
    "manipulation": 0.25, "deflection": 0.30, "insults": 0.34, "obscure_formal": 0.25, "recovery_phase": 0.25,
    "non_abusive": 2.0, "suicidal_threat": 0.45, "physical_threat": 0.10, "extreme_control": 0.36
}

PATTERN_LABELS = LABELS[:15]
DANGER_LABELS = LABELS[15:18]

EXPLANATIONS = {
    "gaslighting": "Gaslighting involves making someone question their own reality or perceptions...",
    "blame_shifting": "Blame-shifting is when one person redirects the responsibility...",
    "projection": "Projection involves accusing the victim of behaviors the abuser exhibits.",
    "dismissiveness": "Dismissiveness is belittling or disregarding another person’s feelings.",
    "mockery": "Mockery ridicules someone in a hurtful, humiliating way.",
    "recovery_phase": "Recovery phase dismisses someone's emotional healing process.",
    "insults": "Insults are derogatory remarks aimed at degrading someone.",
    "apology_baiting": "Apology-baiting manipulates victims into apologizing for abuser's behavior.",
    "deflection": "Deflection avoids accountability by redirecting blame.",
    "control": "Control restricts autonomy through manipulation or coercion.",
    "extreme_control": "Extreme control dominates decisions and behaviors entirely.",
    "physical_threat": "Physical threats signal risk of bodily harm.",
    "suicidal_threat": "Suicidal threats manipulate others using self-harm threats.",
    "guilt_tripping": "Guilt-tripping uses guilt to manipulate someone’s actions.",
    "manipulation": "Manipulation deceives to influence or control outcomes.",
    "non_abusive": "Non-abusive language is respectful and free of coercion.",
    "obscure_formal": "Obscure/formal language manipulates through confusion or superiority."
}

def custom_sentiment(text):
    inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = sentiment_model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=1)
    label_idx = torch.argmax(probs).item()
    label = sentiment_model.config.id2label[label_idx]
    score = probs[0][label_idx].item()
    return {"label": label, "score": score}

def calculate_abuse_level(scores, thresholds):
    triggered_scores = [score for label, score in zip(LABELS, scores) if score > thresholds[label]]
    return round(np.mean(triggered_scores) * 100, 2) if triggered_scores else 0.0

def interpret_abuse_level(score):
    if score > 80: return "Extreme / High Risk"
    elif score > 60: return "Severe / Harmful Pattern Present"
    elif score > 40: return "Likely Abuse"
    elif score > 20: return "Mild Concern"
    return "Very Low / Likely Safe"

def analyze_messages(input_text, risk_flags):
    input_text = input_text.strip()
    if not input_text:
        return "Please enter a message for analysis."

    sentiment = custom_sentiment(input_text)
    sentiment_label = sentiment['label']
    sentiment_score = sentiment['score']

    adjusted_thresholds = {k: v * 0.8 for k, v in THRESHOLDS.items()} if sentiment_label == "NEGATIVE" else THRESHOLDS.copy()

    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()

    pattern_count = sum(score > adjusted_thresholds[label] for label, score in zip(PATTERN_LABELS, scores[:15]))
    danger_flag_count = sum(score > adjusted_thresholds[label] for label, score in zip(DANGER_LABELS, scores[15:18]))

    contextual_flags = risk_flags if risk_flags else []
    if len(contextual_flags) >= 2:
        danger_flag_count += 1

    critical_flags = ["They've threatened harm", "They monitor/follow me", "I feel unsafe when alone with them"]
    high_risk_context = any(flag in contextual_flags for flag in critical_flags)

    non_abusive_score = scores[LABELS.index('non_abusive')]
    if non_abusive_score > adjusted_thresholds['non_abusive']:
        return "This message is classified as non-abusive."

    abuse_level = calculate_abuse_level(scores, THRESHOLDS)
    abuse_description = interpret_abuse_level(abuse_level)

    if danger_flag_count >= 2:
        resources = "Immediate assistance recommended. Please seek professional help or contact emergency services."
    else:
        resources = "For more information on abuse patterns, consider reaching out to support groups or professional counselors."

    scored_patterns = [(label, score) for label, score in zip(PATTERN_LABELS, scores[:15])]
    top_patterns = sorted(scored_patterns, key=lambda x: x[1], reverse=True)[:2]

    top_pattern_explanations = "\n".join([
        f"\u2022 {label.replace('_', ' ').title()}: {EXPLANATIONS.get(label, 'No explanation available.')}"
        for label, _ in top_patterns
    ])

    result = (
        f"Abuse Risk Score: {abuse_level}% – {abuse_description}\n\n"
        f"Most Likely Patterns:\n{top_pattern_explanations}\n\n"
        f"⚠️ Critical Danger Flags Detected: {danger_flag_count} of 3\n"
        "The Danger Assessment is a validated tool that helps identify serious risk in intimate partner violence.\n\n"
        f"Resources: {resources}\n\n"
        f"Sentiment: {sentiment_label} (Confidence: {sentiment_score*100:.2f}%)"
    )

    if contextual_flags:
        result += "\n\n⚠️ You indicated the following:\n" + "\n".join([f"• {flag}" for flag in contextual_flags])
    if high_risk_context:
        result += "\n\n🚨 These responses suggest a high-risk situation. Consider seeking immediate help or safety planning resources."

    return result

iface = gr.Interface(
    fn=analyze_messages,
    inputs=[
        gr.Textbox(lines=10, placeholder="Enter message here..."),
        gr.CheckboxGroup(label="Do any of these apply to your situation?", choices=[
            "They've threatened harm", "They isolate me", "I’ve changed my behavior out of fear",
            "They monitor/follow me", "I feel unsafe when alone with them"
        ])
    ],
    outputs=[gr.Textbox(label="Analysis Result")],
    title="Abuse Pattern Detector"
)

if __name__ == "__main__":
    iface.launch()