Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,934 Bytes
d6e219c f1948f2 a9d4250 f1948f2 0ff864f f1948f2 23bb2d2 f1948f2 5dfb1ca a9d4250 79936aa 5bb1f05 79936aa f1948f2 c303ab8 5bb1f05 c303ab8 4292d1b d20c9bb 5bb1f05 4292d1b 94e76c4 5bb1f05 94e76c4 23bb2d2 5bb1f05 79936aa f1948f2 23bb2d2 5bb1f05 23bb2d2 0ff864f 23bb2d2 5bb1f05 0ff864f 23bb2d2 5bb1f05 f1948f2 4292d1b 79936aa 5bb1f05 d20c9bb 5bb1f05 0ff864f 5bb1f05 947c124 5bb1f05 0ff864f 68ecdb1 5bb1f05 68ecdb1 79936aa 5bb1f05 68ecdb1 5bb1f05 68ecdb1 5bb1f05 d20c9bb 68ecdb1 23bb2d2 5bb1f05 23bb2d2 94e76c4 68ecdb1 94e76c4 68ecdb1 5bb1f05 0ff864f 68ecdb1 b390ecc 5d7c4ba 5bb1f05 ab8c96f 947c124 5bb1f05 5dfb1ca 5bb1f05 ab8c96f 4292d1b 5dfb1ca 5bb1f05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import gradio as gr
import torch
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import numpy as np
from transformers import pipeline
# Load sentiment analysis model
sentiment_analyzer = pipeline("sentiment-analysis")
# Load model and tokenizer
model_name = "SamanthaStorm/abuse-pattern-detector-v2"
model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
LABELS = [
"gaslighting", "mockery", "dismissiveness", "control",
"guilt_tripping", "apology_baiting", "blame_shifting", "projection",
"contradictory_statements", "manipulation", "deflection", "insults",
"obscure_formal", "recovery_phase", "non_abusive",
"suicidal_threat", "physical_threat", "extreme_control"
]
THRESHOLDS = {
"gaslighting": 0.25, "mockery": 0.15, "dismissiveness": 0.30,
"control": 0.43, "guilt_tripping": 0.19, "apology_baiting": 0.45,
"blame_shifting": 0.23, "projection": 0.50, "contradictory_statements": 0.25,
"manipulation": 0.25, "deflection": 0.30, "insults": 0.34,
"obscure_formal": 0.25, "recovery_phase": 0.25, "non_abusive": 0.70,
"suicidal_threat": 0.45, "physical_threat": 0.20, "extreme_control": 0.36
}
PATTERN_LABELS = LABELS[:15]
DANGER_LABELS = LABELS[15:]
EXPLANATIONS = {
"gaslighting": "Gaslighting involves making someone question their own reality or perceptions.",
"blame_shifting": "Blame-shifting is when one person redirects responsibility onto someone else.",
"projection": "Projection accuses the victim of behaviors the abuser exhibits themselves.",
"dismissiveness": "Dismissiveness belittles or ignores another person’s thoughts or feelings.",
"mockery": "Mockery involves ridicule or sarcasm meant to humiliate.",
"recovery_phase": "Recovery phase invalidates someone’s healing process or needs.",
"insults": "Insults are derogatory remarks meant to degrade or attack.",
"apology_baiting": "Apology-baiting manipulates someone into apologizing for the abuser’s actions.",
"deflection": "Deflection shifts responsibility or changes the subject to avoid blame.",
"control": "Control includes behavior that limits another’s autonomy or freedom.",
"extreme_control": "Extreme control is highly manipulative dominance over another’s choices.",
"physical_threat": "Physical threats suggest or state a risk of bodily harm.",
"suicidal_threat": "Suicidal threats use self-harm as a way to manipulate others.",
"guilt_tripping": "Guilt-tripping makes someone feel guilty for things they didn’t cause.",
"manipulation": "Manipulation influences someone’s behavior through deceptive emotional tactics.",
"non_abusive": "Non-abusive language is respectful, supportive, and healthy."
}
def calculate_abuse_level(scores, thresholds):
triggered_scores = [score for label, score in zip(LABELS, scores) if score > thresholds[label]]
if not triggered_scores:
return 0.0
return round(np.mean(triggered_scores) * 100, 2)
def interpret_abuse_level(score):
if score > 80:
return "Extreme / High Risk"
elif score > 60:
return "Severe / Harmful Pattern Present"
elif score > 40:
return "Likely Abuse"
elif score > 20:
return "Mild Concern"
else:
return "Very Low / Likely Safe"
def analyze_messages(input_text, context_flags):
input_text = input_text.strip()
if not input_text:
return "Please enter a message for analysis."
# Sentiment
sentiment = sentiment_analyzer(input_text)[0]
sentiment_label = sentiment['label']
sentiment_score = sentiment['score']
# Threshold adjustment for negative tone
adjusted_thresholds = THRESHOLDS.copy()
if sentiment_label == "NEGATIVE":
adjusted_thresholds = {key: val * 0.8 for key, val in THRESHOLDS.items()}
# Tokenization and prediction
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
# Pattern and danger flags from model
pattern_count = sum(score > adjusted_thresholds[label] for label, score in zip(PATTERN_LABELS, scores[:15]))
danger_flag_count = sum(score > adjusted_thresholds[label] for label, score in zip(DANGER_LABELS, scores[15:]))
# Add contextual danger from checkboxes
if context_flags and len(context_flags) >= 2:
danger_flag_count += 1
# Non-abusive override
non_abusive_score = scores[LABELS.index('non_abusive')]
if non_abusive_score > adjusted_thresholds['non_abusive']:
return "This message is classified as non-abusive."
# Abuse level
abuse_level = calculate_abuse_level(scores, THRESHOLDS)
abuse_description = interpret_abuse_level(abuse_level)
# Resources
if danger_flag_count >= 2:
resources = "⚠️ Your responses indicate elevated danger. Please consider seeking support immediately through a domestic violence hotline or trusted professional."
else:
resources = "For more information on abuse patterns, consider reaching out to support groups or professional counselors."
# Top patterns with definitions
scored_patterns = [(label, score) for label, score in zip(PATTERN_LABELS, scores[:15])]
top_patterns = sorted(scored_patterns, key=lambda x: x[1], reverse=True)[:2]
top_pattern_explanations = "\n".join([
f"• {label.replace('_', ' ').title()}: {EXPLANATIONS.get(label, 'No explanation available.')}"
for label, _ in top_patterns
])
result = (
f"Abuse Risk Score: {abuse_level}% – {abuse_description}\n\n"
f"Most Likely Patterns:\n{top_pattern_explanations}\n\n"
f"⚠️ Critical Danger Flags Detected: {danger_flag_count} of 3\n"
"The Danger Assessment is a validated tool that helps identify serious risk in intimate partner violence. "
"It flags communication patterns associated with increased risk of severe harm.\n\n"
f"Resources: {resources}\n\n"
f"Sentiment: {sentiment_label} (Confidence: {sentiment_score*100:.2f}%)"
)
return result
# Launch interface
iface = gr.Interface(
fn=analyze_messages,
inputs=[
gr.Textbox(lines=10, placeholder="Enter message here..."),
gr.CheckboxGroup(
label="Do any of these apply to your situation?",
choices=[
"They’ve threatened harm",
"They isolate me",
"I’ve changed my behavior out of fear",
"They monitor/follow me",
"I feel unsafe when alone with them"
]
)
],
outputs=gr.Textbox(label="Analysis Result"),
title="Abuse Pattern Detector",
)
if __name__ == "__main__":
iface.launch() |