Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,18 +2,15 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
5 |
-
from transformers import RobertaForSequenceClassification, RobertaTokenizer
|
6 |
from motif_tagging import detect_motifs
|
7 |
-
from abuse_type_mapping import determine_abuse_type
|
8 |
|
9 |
-
#
|
10 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
|
11 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
|
12 |
|
13 |
-
# Load abuse pattern model
|
14 |
model_name = "SamanthaStorm/autotrain-c1un8-p8vzo"
|
15 |
-
model =
|
16 |
-
tokenizer =
|
17 |
|
18 |
LABELS = [
|
19 |
"gaslighting", "mockery", "dismissiveness", "control", "guilt_tripping", "apology_baiting", "blame_shifting", "projection",
|
@@ -28,36 +25,32 @@ THRESHOLDS = {
|
|
28 |
"non_abusive": 2.0, "suicidal_threat": 0.45, "physical_threat": 0.02, "extreme_control": 0.30
|
29 |
}
|
30 |
|
31 |
-
PATTERN_LABELS = LABELS[:15]
|
32 |
-
DANGER_LABELS = LABELS[15:18]
|
33 |
-
|
34 |
EXPLANATIONS = {
|
35 |
"gaslighting": "Gaslighting involves making someone question their own reality or perceptions...",
|
36 |
-
"blame_shifting": "
|
37 |
-
"projection": "
|
38 |
-
"dismissiveness": "
|
39 |
-
"mockery": "
|
40 |
-
"recovery_phase": "
|
41 |
-
"insults": "
|
42 |
-
"apology_baiting": "
|
43 |
-
"deflection": "
|
44 |
-
"control": "
|
45 |
-
"extreme_control": "
|
46 |
-
"physical_threat": "
|
47 |
-
"suicidal_threat": "
|
48 |
-
"guilt_tripping": "
|
49 |
-
"manipulation": "
|
50 |
-
"non_abusive": "
|
51 |
-
"obscure_formal": "
|
52 |
}
|
53 |
|
|
|
|
|
|
|
54 |
PATTERN_WEIGHTS = {
|
55 |
-
"physical_threat": 1.5,
|
56 |
-
"
|
57 |
-
"extreme_control": 1.5,
|
58 |
-
"gaslighting": 1.3,
|
59 |
-
"control": 1.2,
|
60 |
-
"dismissiveness": 0.8,
|
61 |
"non_abusive": 0.0
|
62 |
}
|
63 |
|
@@ -67,73 +60,50 @@ def custom_sentiment(text):
|
|
67 |
outputs = sentiment_model(**inputs)
|
68 |
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
69 |
label_idx = torch.argmax(probs).item()
|
70 |
-
|
71 |
-
label = label_map[label_idx]
|
72 |
-
score = probs[0][label_idx].item()
|
73 |
-
return {"label": label, "score": score}
|
74 |
|
75 |
def calculate_abuse_level(scores, thresholds, motif_hits=None):
|
76 |
-
weighted_scores = []
|
77 |
-
for label, score in zip(LABELS, scores):
|
78 |
-
if score > thresholds[label]:
|
79 |
-
weight = PATTERN_WEIGHTS.get(label, 1.0)
|
80 |
-
weighted_scores.append(score * weight)
|
81 |
base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
|
82 |
-
|
83 |
-
if any(label in motif_hits for label in {"physical_threat", "suicidal_threat", "extreme_control"}):
|
84 |
base_score = max(base_score, 75.0)
|
85 |
return base_score
|
86 |
|
87 |
def interpret_abuse_level(score):
|
88 |
-
if score > 80:
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
elif score > 40:
|
93 |
-
return "Likely Abuse"
|
94 |
-
elif score > 20:
|
95 |
-
return "Mild Concern"
|
96 |
return "Very Low / Likely Safe"
|
97 |
|
98 |
-
def analyze_single_message(text,
|
99 |
motif_flags, matched_phrases = detect_motifs(text)
|
100 |
-
|
101 |
-
|
102 |
-
sentiment_label = sentiment_result["label"]
|
103 |
-
sentiment_score = sentiment_result["score"]
|
104 |
-
thresholds = {k: v * 0.8 for k, v in THRESHOLDS.items()} if sentiment_label == "undermining" else THRESHOLDS.copy()
|
105 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
106 |
with torch.no_grad():
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
phrase_labels = [label for label, _ in matched_phrases]
|
111 |
-
pattern_labels_used = list(set(threshold_labels + phrase_labels))
|
112 |
-
abuse_level = calculate_abuse_level(scores, thresholds, motif_hits=[label for label, _ in matched_phrases])
|
113 |
abuse_description = interpret_abuse_level(abuse_level)
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
"summary": abuse_description,
|
118 |
-
"sentiment": f"{sentiment_label} ({sentiment_score*100:.2f}%)",
|
119 |
-
"top_labels": pattern_labels_used[:2],
|
120 |
-
"matched_phrases": matched_phrases,
|
121 |
-
"flags": contextual_flags
|
122 |
-
}
|
123 |
|
124 |
def analyze_composite(msg1, msg2, msg3, flags):
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
130 |
|
131 |
iface = gr.Interface(
|
132 |
fn=analyze_composite,
|
133 |
inputs=[
|
134 |
-
gr.Textbox(label="Message 1"),
|
135 |
-
gr.Textbox(label="Message 2"),
|
136 |
-
gr.Textbox(label="Message 3"),
|
137 |
gr.CheckboxGroup(label="Contextual Flags", choices=[
|
138 |
"They've threatened harm", "They isolate me", "I’ve changed my behavior out of fear",
|
139 |
"They monitor/follow me", "I feel unsafe when alone with them"
|
@@ -150,4 +120,4 @@ iface = gr.Interface(
|
|
150 |
)
|
151 |
|
152 |
if __name__ == "__main__":
|
153 |
-
iface.
|
|
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
|
|
5 |
from motif_tagging import detect_motifs
|
|
|
6 |
|
7 |
+
# Load models
|
8 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
|
9 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
|
10 |
|
|
|
11 |
model_name = "SamanthaStorm/autotrain-c1un8-p8vzo"
|
12 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
13 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
14 |
|
15 |
LABELS = [
|
16 |
"gaslighting", "mockery", "dismissiveness", "control", "guilt_tripping", "apology_baiting", "blame_shifting", "projection",
|
|
|
25 |
"non_abusive": 2.0, "suicidal_threat": 0.45, "physical_threat": 0.02, "extreme_control": 0.30
|
26 |
}
|
27 |
|
|
|
|
|
|
|
28 |
EXPLANATIONS = {
|
29 |
"gaslighting": "Gaslighting involves making someone question their own reality or perceptions...",
|
30 |
+
"blame_shifting": "Redirecting responsibility to the victim...",
|
31 |
+
"projection": "Accusing the victim of behaviors the abuser exhibits...",
|
32 |
+
"dismissiveness": "Belittling or disregarding someone's feelings...",
|
33 |
+
"mockery": "Ridiculing someone in a hurtful, humiliating way...",
|
34 |
+
"recovery_phase": "Dismissing someone's emotional healing...",
|
35 |
+
"insults": "Derogatory remarks aimed at degrading someone...",
|
36 |
+
"apology_baiting": "Manipulating victims into apologizing for abuse...",
|
37 |
+
"deflection": "Redirecting blame to avoid accountability...",
|
38 |
+
"control": "Restricting autonomy through manipulation...",
|
39 |
+
"extreme_control": "Dominating decisions and behaviors entirely...",
|
40 |
+
"physical_threat": "Signals risk of bodily harm...",
|
41 |
+
"suicidal_threat": "Manipulates others using self-harm threats...",
|
42 |
+
"guilt_tripping": "Uses guilt to manipulate someone's actions...",
|
43 |
+
"manipulation": "Deceives to influence or control outcomes...",
|
44 |
+
"non_abusive": "Respectful and free of coercion...",
|
45 |
+
"obscure_formal": "Uses confusion/superiority to manipulate..."
|
46 |
}
|
47 |
|
48 |
+
DANGER_LABELS = LABELS[15:18]
|
49 |
+
PATTERN_LABELS = LABELS[:15]
|
50 |
+
|
51 |
PATTERN_WEIGHTS = {
|
52 |
+
"physical_threat": 1.5, "suicidal_threat": 1.4, "extreme_control": 1.5,
|
53 |
+
"gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8,
|
|
|
|
|
|
|
|
|
54 |
"non_abusive": 0.0
|
55 |
}
|
56 |
|
|
|
60 |
outputs = sentiment_model(**inputs)
|
61 |
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
62 |
label_idx = torch.argmax(probs).item()
|
63 |
+
return {"label": "supportive" if label_idx == 0 else "undermining", "score": probs[0][label_idx].item()}
|
|
|
|
|
|
|
64 |
|
65 |
def calculate_abuse_level(scores, thresholds, motif_hits=None):
|
66 |
+
weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
|
|
|
|
|
|
|
|
|
67 |
base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
|
68 |
+
if any(label in (motif_hits or []) for label in DANGER_LABELS):
|
|
|
69 |
base_score = max(base_score, 75.0)
|
70 |
return base_score
|
71 |
|
72 |
def interpret_abuse_level(score):
|
73 |
+
if score > 80: return "Extreme / High Risk"
|
74 |
+
if score > 60: return "Severe / Harmful Pattern Present"
|
75 |
+
if score > 40: return "Likely Abuse"
|
76 |
+
if score > 20: return "Mild Concern"
|
|
|
|
|
|
|
|
|
77 |
return "Very Low / Likely Safe"
|
78 |
|
79 |
+
def analyze_single_message(text, thresholds, context_flags):
|
80 |
motif_flags, matched_phrases = detect_motifs(text)
|
81 |
+
sentiment = custom_sentiment(text)
|
82 |
+
thresholds = {k: v * 0.8 for k, v in thresholds.items()} if sentiment['label'] == "undermining" else thresholds.copy()
|
|
|
|
|
|
|
83 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
84 |
with torch.no_grad():
|
85 |
+
scores = torch.sigmoid(model(**inputs).logits.squeeze(0)).numpy()
|
86 |
+
labels_used = list(set([l for l, s in zip(PATTERN_LABELS, scores[:15]) if s > thresholds[l]] + [l for l, _ in matched_phrases]))
|
87 |
+
abuse_level = calculate_abuse_level(scores, thresholds, motif_hits=[l for l, _ in matched_phrases])
|
|
|
|
|
|
|
88 |
abuse_description = interpret_abuse_level(abuse_level)
|
89 |
+
danger_count = sum(scores[LABELS.index(lbl)] > thresholds[lbl] for lbl in DANGER_LABELS)
|
90 |
+
output = f"Score: {abuse_level}% – {abuse_description}\nLabels: {', '.join(labels_used)}"
|
91 |
+
return output, abuse_level
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
def analyze_composite(msg1, msg2, msg3, flags):
|
94 |
+
thresholds = THRESHOLDS.copy()
|
95 |
+
results = [analyze_single_message(t, thresholds, flags) for t in [msg1, msg2, msg3] if t.strip()]
|
96 |
+
result_texts = [r[0] for r in results]
|
97 |
+
composite_score = round(np.mean([r[1] for r in results]), 2) if results else 0.0
|
98 |
+
result_texts.append(f"\nComposite Abuse Score: {composite_score}%")
|
99 |
+
return tuple(result_texts)
|
100 |
|
101 |
iface = gr.Interface(
|
102 |
fn=analyze_composite,
|
103 |
inputs=[
|
104 |
+
gr.Textbox(lines=3, label="Message 1"),
|
105 |
+
gr.Textbox(lines=3, label="Message 2"),
|
106 |
+
gr.Textbox(lines=3, label="Message 3"),
|
107 |
gr.CheckboxGroup(label="Contextual Flags", choices=[
|
108 |
"They've threatened harm", "They isolate me", "I’ve changed my behavior out of fear",
|
109 |
"They monitor/follow me", "I feel unsafe when alone with them"
|
|
|
120 |
)
|
121 |
|
122 |
if __name__ == "__main__":
|
123 |
+
iface.launch()
|