File size: 8,522 Bytes
d6e219c
f1948f2
43095bd
f1948f2
43095bd
0ff864f
43095bd
0ff864f
f1948f2
43095bd
f1948f2
5dfb1ca
 
a9d4250
43095bd
79936aa
 
 
 
43095bd
 
79936aa
f1948f2
43095bd
c303ab8
6476c8b
 
 
 
 
 
 
 
 
 
 
 
 
 
e080d9e
6476c8b
 
 
c303ab8
4292d1b
43095bd
 
 
94e76c4
6476c8b
 
 
 
 
 
 
 
 
 
 
 
 
 
43095bd
6476c8b
43095bd
 
94e76c4
23bb2d2
43095bd
 
 
 
 
 
 
 
23bb2d2
 
 
 
 
 
 
 
 
 
 
 
43095bd
 
79936aa
f1948f2
23bb2d2
 
 
0ff864f
 
23bb2d2
0ff864f
43095bd
 
23bb2d2
f1948f2
 
 
4292d1b
79936aa
d20c9bb
43095bd
0ff864f
43095bd
 
 
5bb1f05
947c124
43095bd
 
 
0ff864f
 
 
68ecdb1
43095bd
68ecdb1
79936aa
68ecdb1
6476c8b
68ecdb1
 
 
43095bd
 
 
 
 
 
 
 
68ecdb1
94e76c4
 
68ecdb1
 
6476c8b
 
 
0ff864f
68ecdb1
43095bd
 
 
 
 
 
b390ecc
5d7c4ba
43095bd
ab8c96f
 
947c124
43095bd
 
 
 
 
 
 
 
 
 
 
5dfb1ca
6476c8b
ab8c96f
4292d1b
5dfb1ca
5bb1f05
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import gradio as gr
import torch
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import numpy as np
from transformers import pipeline

# Load sentiment analysis model
sentiment_analyzer = pipeline("sentiment-analysis")

# Load model and tokenizer
model_name = "SamanthaStorm/abuse-pattern-detector-v2"
model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Define labels (18 total)
LABELS = [
    "gaslighting", "mockery", "dismissiveness", "control",
    "guilt_tripping", "apology_baiting", "blame_shifting", "projection",
    "contradictory_statements", "manipulation", "deflection", "insults",
    "obscure_formal", "recovery_phase", "non_abusive", "suicidal_threat", "physical_threat",
    "extreme_control"
]

# Custom thresholds for each label
THRESHOLDS = {
    "gaslighting": 0.25,
    "mockery": 0.15,
    "dismissiveness": 0.30,
    "control": 0.43,
    "guilt_tripping": 0.19,
    "apology_baiting": 0.45,
    "blame_shifting": 0.23,
    "projection": 0.50,
    "contradictory_statements": 0.25,
    "manipulation": 0.25,
    "deflection": 0.30,
    "insults": 0.34,
    "obscure_formal": 0.25,
    "recovery_phase": 0.25,
    "non_abusive": 0.99,
    "suicidal_threat": 0.45,
    "physical_threat": 0.20,
    "extreme_control": 0.36
}

PATTERN_LABELS = LABELS[:15]
DANGER_LABELS = LABELS[15:18]

EXPLANATIONS = {
    "gaslighting": "Gaslighting involves making someone question their own reality or perceptions, often causing them to feel confused or insecure.",
    "blame_shifting": "Blame-shifting is when one person redirects the responsibility for an issue onto someone else, avoiding accountability.",
    "projection": "Projection involves accusing the victim of behaviors or characteristics that the abuser themselves exhibit.",
    "dismissiveness": "Dismissiveness is the act of belittling or disregarding another person's thoughts, feelings, or experiences.",
    "mockery": "Mockery involves ridiculing or making fun of someone in a hurtful way, often with the intent to humiliate them.",
    "recovery_phase": "Recovery phase refers to dismissing or invalidating someone’s process of emotional healing, or ignoring their need for support.",
    "insults": "Insults are derogatory remarks aimed at degrading or humiliating someone, often targeting their personal traits or character.",
    "apology_baiting": "Apology-baiting is when the abuser manipulates the victim into apologizing for something the abuser caused or did wrong.",
    "deflection": "Deflection is the act of avoiding responsibility or shifting focus away from one's own actions, often to avoid accountability.",
    "control": "Control tactics are behaviors that restrict or limit someone's autonomy, often involving domination, manipulation, or coercion.",
    "extreme_control": "Extreme control involves excessive manipulation or domination over someone’s actions, decisions, or behaviors.",
    "physical_threat": "Physical threats involve any indication or direct mention of harm to someone’s physical well-being, often used to intimidate or control.",
    "suicidal_threat": "Suicidal threats are statements made to manipulate or control someone by making them feel responsible for the abuser’s well-being.",
    "guilt_tripping": "Guilt-tripping involves making someone feel guilty or responsible for things they didn’t do, often to manipulate their behavior.",
    "emotional_manipulation": "Emotional manipulation is using guilt, fear, or emotional dependency to control another person’s thoughts, feelings, or actions.",
    "manipulation": "Manipulation refers to using deceptive tactics to control or influence someone’s emotions, decisions, or behavior to serve the manipulator’s own interests.",
    "non_abusive": "Non-abusive language is communication that is respectful, empathetic, and free of harmful behaviors or manipulation.",
    "obscure_formal": "Obscure or overly formal language used manipulatively to create confusion, avoid responsibility, or assert superiority."
}


def calculate_abuse_level(scores, thresholds):
    triggered_scores = [score for label, score in zip(LABELS, scores) if score > thresholds[label]]
    if not triggered_scores:
        return 0.0
    return round(np.mean(triggered_scores) * 100, 2)


def interpret_abuse_level(score):
    if score > 80:
        return "Extreme / High Risk"
    elif score > 60:
        return "Severe / Harmful Pattern Present"
    elif score > 40:
        return "Likely Abuse"
    elif score > 20:
        return "Mild Concern"
    else:
        return "Very Low / Likely Safe"


def analyze_messages(input_text, risk_flags):
    input_text = input_text.strip()
    if not input_text:
        return "Please enter a message for analysis."

    sentiment = sentiment_analyzer(input_text)[0]
    sentiment_label = sentiment['label']
    sentiment_score = sentiment['score']

    adjusted_thresholds = THRESHOLDS.copy()
    if sentiment_label == "NEGATIVE":
        adjusted_thresholds = {key: val * 0.8 for key, val in THRESHOLDS.items()}

    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()

    pattern_count = sum(score > adjusted_thresholds[label] for label, score in zip(PATTERN_LABELS, scores[:15]))
    danger_flag_count = sum(score > adjusted_thresholds[label] for label, score in zip(DANGER_LABELS, scores[15:18]))

    contextual_flags = risk_flags if risk_flags else []
    contextual_risk_score = len(contextual_flags)
    if contextual_risk_score >= 2:
        danger_flag_count += 1

    critical_flags = ["They've threatened harm", "They monitor/follow me", "I feel unsafe when alone with them"]
    high_risk_context = any(flag in contextual_flags for flag in critical_flags)

    non_abusive_score = scores[LABELS.index('non_abusive')]
    if non_abusive_score > adjusted_thresholds['non_abusive']:
        return "This message is classified as non-abusive."

    abuse_level = calculate_abuse_level(scores, THRESHOLDS)
    abuse_description = interpret_abuse_level(abuse_level)

    if danger_flag_count >= 2:
        resources = "Immediate assistance recommended. Please seek professional help or contact emergency services."
    else:
        resources = "For more information on abuse patterns, consider reaching out to support groups or professional counselors."

    scored_patterns = [(label, score) for label, score in zip(PATTERN_LABELS, scores[:15])]
    top_patterns = sorted(scored_patterns, key=lambda x: x[1], reverse=True)[:2]

    top_pattern_explanations = "\n".join([
        f"\u2022 {label.replace('_', ' ').title()}: {EXPLANATIONS.get(label, 'No explanation available.')}"
        for label, _ in top_patterns
    ])

    result = (
        f"Abuse Risk Score: {abuse_level}% – {abuse_description}\n\n"
        f"Most Likely Patterns:\n{top_pattern_explanations}\n\n"
        f"⚠️ Critical Danger Flags Detected: {danger_flag_count} of 3\n"
        "The Danger Assessment is a validated tool that helps identify serious risk in intimate partner violence. "
        "It flags communication patterns associated with increased risk of severe harm. "
        "For more info, consider reaching out to support groups or professionals.\n\n"
        f"Resources: {resources} \n\n"
        f"Sentiment: {sentiment_label} (Confidence: {sentiment_score*100:.2f}%)"
    )

    if contextual_flags:
        result += "\n\n⚠️ You indicated the following:\n" + "\n".join([f"• {flag}" for flag in contextual_flags])
    if high_risk_context:
        result += "\n\n🚨 These responses suggest a high-risk situation. Consider seeking immediate help or safety planning resources."

    return result


iface = gr.Interface(
    fn=analyze_messages,
    inputs=[
        gr.Textbox(lines=10, placeholder="Enter message here..."),
        gr.CheckboxGroup(label="Do any of these apply to your situation?", choices=[
            "They've threatened harm",
            "They isolate me",
            "I’ve changed my behavior out of fear",
            "They monitor/follow me",
            "I feel unsafe when alone with them"
        ])
    ],
    outputs=[
        gr.Textbox(label="Analysis Result"),
    ],
    title="Abuse Pattern Detector"
)

if __name__ == "__main__":
    iface.launch()