Spaces:
Running
Running
File size: 6,053 Bytes
d6e219c f1948f2 e185e86 b54664e 0ff864f 38e8859 4dccd71 e185e86 dd2f06d 38e8859 a9d4250 e185e86 38e8859 e185e86 38e8859 e185e86 38e8859 e185e86 23bb2d2 4472a1d 73582bd b98a1ee 38e8859 4472a1d 28fc37c 38e8859 dcb0de6 38e8859 ecc77cc 28fc37c 43095bd 23bb2d2 38e8859 73582bd 43095bd 38e8859 a6c0cf2 38e8859 a6c0cf2 2dda625 38e8859 e185e86 38e8859 2dda625 e185e86 38e8859 a28ef35 ab8c96f a6c0cf2 ad04ec8 38e8859 e185e86 a6c0cf2 e185e86 ad04ec8 a6c0cf2 e185e86 ab8c96f 4292d1b 2dda625 38e8859 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from motif_tagging import detect_motifs
# Load models
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
model_name = "SamanthaStorm/autotrain-c1un8-p8vzo"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
LABELS = [
"gaslighting", "mockery", "dismissiveness", "control", "guilt_tripping", "apology_baiting", "blame_shifting", "projection",
"contradictory_statements", "manipulation", "deflection", "insults", "obscure_formal", "recovery_phase", "non_abusive",
"suicidal_threat", "physical_threat", "extreme_control"
]
THRESHOLDS = {
"gaslighting": 0.25, "mockery": 0.15, "dismissiveness": 0.45, "control": 0.43, "guilt_tripping": 0.15,
"apology_baiting": 0.2, "blame_shifting": 0.23, "projection": 0.50, "contradictory_statements": 0.25,
"manipulation": 0.25, "deflection": 0.30, "insults": 0.34, "obscure_formal": 0.25, "recovery_phase": 0.25,
"non_abusive": 2.0, "suicidal_threat": 0.45, "physical_threat": 0.02, "extreme_control": 0.30
}
EXPLANATIONS = {
"gaslighting": "Gaslighting involves making someone question their own reality or perceptions...",
"blame_shifting": "Redirecting responsibility to the victim...",
"projection": "Accusing the victim of behaviors the abuser exhibits...",
"dismissiveness": "Belittling or disregarding someone's feelings...",
"mockery": "Ridiculing someone in a hurtful, humiliating way...",
"recovery_phase": "Dismissing someone's emotional healing...",
"insults": "Derogatory remarks aimed at degrading someone...",
"apology_baiting": "Manipulating victims into apologizing for abuse...",
"deflection": "Redirecting blame to avoid accountability...",
"control": "Restricting autonomy through manipulation...",
"extreme_control": "Dominating decisions and behaviors entirely...",
"physical_threat": "Signals risk of bodily harm...",
"suicidal_threat": "Manipulates others using self-harm threats...",
"guilt_tripping": "Uses guilt to manipulate someone's actions...",
"manipulation": "Deceives to influence or control outcomes...",
"non_abusive": "Respectful and free of coercion...",
"obscure_formal": "Uses confusion/superiority to manipulate..."
}
DANGER_LABELS = LABELS[15:18]
PATTERN_LABELS = LABELS[:15]
PATTERN_WEIGHTS = {
"physical_threat": 1.5, "suicidal_threat": 1.4, "extreme_control": 1.5,
"gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8,
"non_abusive": 0.0
}
def custom_sentiment(text):
inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = sentiment_model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
label_idx = torch.argmax(probs).item()
return {"label": "supportive" if label_idx == 0 else "undermining", "score": probs[0][label_idx].item()}
def calculate_abuse_level(scores, thresholds, motif_hits=None):
weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
if any(label in (motif_hits or []) for label in DANGER_LABELS):
base_score = max(base_score, 75.0)
return base_score
def interpret_abuse_level(score):
if score > 80: return "Extreme / High Risk"
if score > 60: return "Severe / Harmful Pattern Present"
if score > 40: return "Likely Abuse"
if score > 20: return "Mild Concern"
return "Very Low / Likely Safe"
def analyze_single_message(text, thresholds, context_flags):
motif_flags, matched_phrases = detect_motifs(text)
sentiment = custom_sentiment(text)
thresholds = {k: v * 0.8 for k, v in thresholds.items()} if sentiment['label'] == "undermining" else thresholds.copy()
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
scores = torch.sigmoid(model(**inputs).logits.squeeze(0)).numpy()
labels_used = list(set([l for l, s in zip(PATTERN_LABELS, scores[:15]) if s > thresholds[l]] + [l for l, _ in matched_phrases]))
abuse_level = calculate_abuse_level(scores, thresholds, motif_hits=[l for l, _ in matched_phrases])
abuse_description = interpret_abuse_level(abuse_level)
danger_count = sum(scores[LABELS.index(lbl)] > thresholds[lbl] for lbl in DANGER_LABELS)
output = f"Score: {abuse_level}% – {abuse_description}\nLabels: {', '.join(labels_used)}"
return output, abuse_level
def analyze_composite(msg1, msg2, msg3, flags):
thresholds = THRESHOLDS.copy()
results = [analyze_single_message(t, thresholds, flags) for t in [msg1, msg2, msg3] if t.strip()]
result_texts = [r[0] for r in results]
composite_score = round(np.mean([r[1] for r in results]), 2) if results else 0.0
result_texts.append(f"\nComposite Abuse Score: {composite_score}%")
return tuple(result_texts)
iface = gr.Interface(
fn=analyze_composite,
inputs=[
gr.Textbox(lines=3, label="Message 1"),
gr.Textbox(lines=3, label="Message 2"),
gr.Textbox(lines=3, label="Message 3"),
gr.CheckboxGroup(label="Contextual Flags", choices=[
"They've threatened harm", "They isolate me", "I’ve changed my behavior out of fear",
"They monitor/follow me", "I feel unsafe when alone with them"
])
],
outputs=[
gr.Textbox(label="Message 1 Result"),
gr.Textbox(label="Message 2 Result"),
gr.Textbox(label="Message 3 Result"),
gr.Textbox(label="Composite Score")
],
title="Abuse Pattern Detector (Multi-Message)",
flagging_mode="manual"
)
if __name__ == "__main__":
iface.launch() |