Spaces:

SamanthaStorm
/

Tether

Running on Zero

File size: 6,934 Bytes

d6e219c
f1948f2
a9d4250
f1948f2
0ff864f
 
 
 
f1948f2
23bb2d2
f1948f2
5dfb1ca
 
a9d4250
79936aa
 
 
 
5bb1f05
 
79936aa
f1948f2
c303ab8
5bb1f05
 
 
 
 
 
c303ab8
4292d1b
d20c9bb
5bb1f05
4292d1b
94e76c4
5bb1f05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94e76c4
23bb2d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5bb1f05
79936aa
f1948f2
23bb2d2
 
5bb1f05
23bb2d2
0ff864f
 
23bb2d2
5bb1f05
0ff864f
 
23bb2d2
 
5bb1f05
f1948f2
 
 
4292d1b
79936aa
5bb1f05
d20c9bb
5bb1f05
0ff864f
5bb1f05
 
 
947c124
5bb1f05
0ff864f
 
 
68ecdb1
5bb1f05
68ecdb1
 
79936aa
5bb1f05
68ecdb1
5bb1f05
68ecdb1
 
 
5bb1f05
d20c9bb
68ecdb1
23bb2d2
5bb1f05
23bb2d2
 
94e76c4
68ecdb1
94e76c4
 
68ecdb1
 
5bb1f05
 
0ff864f
68ecdb1
 
b390ecc
5d7c4ba
5bb1f05
ab8c96f
 
947c124
 
5bb1f05
 
 
 
 
 
 
 
 
 
5dfb1ca
5bb1f05
 
ab8c96f
4292d1b
5dfb1ca
5bb1f05

import gradio as gr
import torch
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import numpy as np
from transformers import pipeline

# Load sentiment analysis model
sentiment_analyzer = pipeline("sentiment-analysis")

# Load model and tokenizer
model_name = "SamanthaStorm/abuse-pattern-detector-v2"
model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)

LABELS = [
    "gaslighting", "mockery", "dismissiveness", "control",
    "guilt_tripping", "apology_baiting", "blame_shifting", "projection",
    "contradictory_statements", "manipulation", "deflection", "insults",
    "obscure_formal", "recovery_phase", "non_abusive",
    "suicidal_threat", "physical_threat", "extreme_control"
]

THRESHOLDS = {
    "gaslighting": 0.25, "mockery": 0.15, "dismissiveness": 0.30,
    "control": 0.43, "guilt_tripping": 0.19, "apology_baiting": 0.45,
    "blame_shifting": 0.23, "projection": 0.50, "contradictory_statements": 0.25,
    "manipulation": 0.25, "deflection": 0.30, "insults": 0.34,
    "obscure_formal": 0.25, "recovery_phase": 0.25, "non_abusive": 0.70,
    "suicidal_threat": 0.45, "physical_threat": 0.20, "extreme_control": 0.36
}

PATTERN_LABELS = LABELS[:15]
DANGER_LABELS = LABELS[15:]

EXPLANATIONS = {
    "gaslighting": "Gaslighting involves making someone question their own reality or perceptions.",
    "blame_shifting": "Blame-shifting is when one person redirects responsibility onto someone else.",
    "projection": "Projection accuses the victim of behaviors the abuser exhibits themselves.",
    "dismissiveness": "Dismissiveness belittles or ignores another person’s thoughts or feelings.",
    "mockery": "Mockery involves ridicule or sarcasm meant to humiliate.",
    "recovery_phase": "Recovery phase invalidates someone’s healing process or needs.",
    "insults": "Insults are derogatory remarks meant to degrade or attack.",
    "apology_baiting": "Apology-baiting manipulates someone into apologizing for the abuser’s actions.",
    "deflection": "Deflection shifts responsibility or changes the subject to avoid blame.",
    "control": "Control includes behavior that limits another’s autonomy or freedom.",
    "extreme_control": "Extreme control is highly manipulative dominance over another’s choices.",
    "physical_threat": "Physical threats suggest or state a risk of bodily harm.",
    "suicidal_threat": "Suicidal threats use self-harm as a way to manipulate others.",
    "guilt_tripping": "Guilt-tripping makes someone feel guilty for things they didn’t cause.",
    "manipulation": "Manipulation influences someone’s behavior through deceptive emotional tactics.",
    "non_abusive": "Non-abusive language is respectful, supportive, and healthy."
}

def calculate_abuse_level(scores, thresholds):
    triggered_scores = [score for label, score in zip(LABELS, scores) if score > thresholds[label]]
    if not triggered_scores:
        return 0.0
    return round(np.mean(triggered_scores) * 100, 2)

def interpret_abuse_level(score):
    if score > 80:
        return "Extreme / High Risk"
    elif score > 60:
        return "Severe / Harmful Pattern Present"
    elif score > 40:
        return "Likely Abuse"
    elif score > 20:
        return "Mild Concern"
    else:
        return "Very Low / Likely Safe"

def analyze_messages(input_text, context_flags):
    input_text = input_text.strip()
    if not input_text:
        return "Please enter a message for analysis."

    # Sentiment
    sentiment = sentiment_analyzer(input_text)[0]
    sentiment_label = sentiment['label']
    sentiment_score = sentiment['score']

    # Threshold adjustment for negative tone
    adjusted_thresholds = THRESHOLDS.copy()
    if sentiment_label == "NEGATIVE":
        adjusted_thresholds = {key: val * 0.8 for key, val in THRESHOLDS.items()}

    # Tokenization and prediction
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()

    # Pattern and danger flags from model
    pattern_count = sum(score > adjusted_thresholds[label] for label, score in zip(PATTERN_LABELS, scores[:15]))
    danger_flag_count = sum(score > adjusted_thresholds[label] for label, score in zip(DANGER_LABELS, scores[15:]))

    # Add contextual danger from checkboxes
    if context_flags and len(context_flags) >= 2:
        danger_flag_count += 1

    # Non-abusive override
    non_abusive_score = scores[LABELS.index('non_abusive')]
    if non_abusive_score > adjusted_thresholds['non_abusive']:
        return "This message is classified as non-abusive."

    # Abuse level
    abuse_level = calculate_abuse_level(scores, THRESHOLDS)
    abuse_description = interpret_abuse_level(abuse_level)

    # Resources
    if danger_flag_count >= 2:
        resources = "⚠️ Your responses indicate elevated danger. Please consider seeking support immediately through a domestic violence hotline or trusted professional."
    else:
        resources = "For more information on abuse patterns, consider reaching out to support groups or professional counselors."

    # Top patterns with definitions
    scored_patterns = [(label, score) for label, score in zip(PATTERN_LABELS, scores[:15])]
    top_patterns = sorted(scored_patterns, key=lambda x: x[1], reverse=True)[:2]
    top_pattern_explanations = "\n".join([
        f"• {label.replace('_', ' ').title()}: {EXPLANATIONS.get(label, 'No explanation available.')}"
        for label, _ in top_patterns
    ])

    result = (
        f"Abuse Risk Score: {abuse_level}% – {abuse_description}\n\n"
        f"Most Likely Patterns:\n{top_pattern_explanations}\n\n"
        f"⚠️ Critical Danger Flags Detected: {danger_flag_count} of 3\n"
        "The Danger Assessment is a validated tool that helps identify serious risk in intimate partner violence. "
        "It flags communication patterns associated with increased risk of severe harm.\n\n"
        f"Resources: {resources}\n\n"
        f"Sentiment: {sentiment_label} (Confidence: {sentiment_score*100:.2f}%)"
    )

    return result

# Launch interface
iface = gr.Interface(
    fn=analyze_messages,
    inputs=[
        gr.Textbox(lines=10, placeholder="Enter message here..."),
        gr.CheckboxGroup(
            label="Do any of these apply to your situation?",
            choices=[
                "They’ve threatened harm",
                "They isolate me",
                "I’ve changed my behavior out of fear",
                "They monitor/follow me",
                "I feel unsafe when alone with them"
            ]
        )
    ],
    outputs=gr.Textbox(label="Analysis Result"),
    title="Abuse Pattern Detector",
)

if __name__ == "__main__":
    iface.launch()