Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -4,9 +4,8 @@ import numpy as np
|
|
4 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
5 |
from transformers import RobertaForSequenceClassification, RobertaTokenizer
|
6 |
from motif_tagging import detect_motifs
|
7 |
-
from abuse_type_mapping import determine_abuse_type
|
8 |
|
9 |
-
#
|
10 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
|
11 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
|
12 |
|
@@ -29,7 +28,6 @@ THRESHOLDS = {
|
|
29 |
}
|
30 |
|
31 |
PATTERN_LABELS = LABELS[:15]
|
32 |
-
DANGER_LABELS = LABELS[15:18]
|
33 |
|
34 |
EXPLANATIONS = {
|
35 |
"gaslighting": "Gaslighting involves making someone question their own reality or perceptions...",
|
@@ -42,23 +40,12 @@ EXPLANATIONS = {
|
|
42 |
"apology_baiting": "Apology-baiting manipulates victims into apologizing for abuser's behavior.",
|
43 |
"deflection": "Deflection avoids accountability by redirecting blame.",
|
44 |
"control": "Control restricts autonomy through manipulation or coercion.",
|
45 |
-
"extreme_control": "Extreme control dominates decisions and behaviors entirely.",
|
46 |
-
"physical_threat": "Physical threats signal risk of bodily harm.",
|
47 |
-
"suicidal_threat": "Suicidal threats manipulate others using self-harm threats.",
|
48 |
-
"guilt_tripping": "Guilt-tripping uses guilt to manipulate someone’s actions.",
|
49 |
"manipulation": "Manipulation deceives to influence or control outcomes.",
|
50 |
-
"non_abusive": "Non-abusive language is respectful and free of coercion.",
|
51 |
"obscure_formal": "Obscure/formal language manipulates through confusion or superiority."
|
52 |
}
|
53 |
|
54 |
PATTERN_WEIGHTS = {
|
55 |
-
"
|
56 |
-
"suicidal_threat": 1.4,
|
57 |
-
"extreme_control": 1.5,
|
58 |
-
"gaslighting": 1.3,
|
59 |
-
"control": 1.2,
|
60 |
-
"dismissiveness": 0.8,
|
61 |
-
"non_abusive": 0.0
|
62 |
}
|
63 |
|
64 |
def custom_sentiment(text):
|
@@ -73,9 +60,6 @@ def custom_sentiment(text):
|
|
73 |
def calculate_abuse_level(scores, thresholds, motif_hits=None):
|
74 |
weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
|
75 |
base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
|
76 |
-
motif_hits = motif_hits or []
|
77 |
-
if any(label in motif_hits for label in {"physical_threat", "suicidal_threat", "extreme_control"}):
|
78 |
-
base_score = max(base_score, 75.0)
|
79 |
return base_score
|
80 |
|
81 |
def interpret_abuse_level(score):
|
@@ -97,28 +81,22 @@ def analyze_single_message(text, thresholds, motif_flags):
|
|
97 |
with torch.no_grad():
|
98 |
outputs = model(**inputs)
|
99 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
100 |
-
threshold_labels = [label for label, score in zip(PATTERN_LABELS, scores[:15]) if score > adjusted_thresholds[label]]
|
101 |
-
phrase_labels = [label for label, _ in matched_phrases]
|
102 |
-
pattern_labels_used = list(set(threshold_labels + phrase_labels))
|
103 |
abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
|
104 |
-
|
105 |
-
|
106 |
-
pattern_expl = "\n".join([f"• {label.replace('_', ' ').title()}: {EXPLANATIONS.get(label)}" for label, _ in top_patterns])
|
107 |
-
return abuse_level, abuse_description, pattern_expl
|
108 |
|
109 |
def analyze_composite(msg1, msg2, msg3, flags):
|
110 |
thresholds = THRESHOLDS
|
111 |
-
|
|
|
112 |
if not results:
|
113 |
return "Please enter at least one message."
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
composite
|
120 |
-
result_lines.append(f"\nComposite Abuse Score: {composite}%")
|
121 |
-
return "\n\n".join(result_lines)
|
122 |
|
123 |
txt_inputs = [
|
124 |
gr.Textbox(label="Message 1"),
|
@@ -140,4 +118,4 @@ iface = gr.Interface(
|
|
140 |
)
|
141 |
|
142 |
if __name__ == "__main__":
|
143 |
-
iface.launch()
|
|
|
4 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
5 |
from transformers import RobertaForSequenceClassification, RobertaTokenizer
|
6 |
from motif_tagging import detect_motifs
|
|
|
7 |
|
8 |
+
# Load sentiment model
|
9 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
|
10 |
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
|
11 |
|
|
|
28 |
}
|
29 |
|
30 |
PATTERN_LABELS = LABELS[:15]
|
|
|
31 |
|
32 |
EXPLANATIONS = {
|
33 |
"gaslighting": "Gaslighting involves making someone question their own reality or perceptions...",
|
|
|
40 |
"apology_baiting": "Apology-baiting manipulates victims into apologizing for abuser's behavior.",
|
41 |
"deflection": "Deflection avoids accountability by redirecting blame.",
|
42 |
"control": "Control restricts autonomy through manipulation or coercion.",
|
|
|
|
|
|
|
|
|
43 |
"manipulation": "Manipulation deceives to influence or control outcomes.",
|
|
|
44 |
"obscure_formal": "Obscure/formal language manipulates through confusion or superiority."
|
45 |
}
|
46 |
|
47 |
PATTERN_WEIGHTS = {
|
48 |
+
"gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8, "non_abusive": 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
}
|
50 |
|
51 |
def custom_sentiment(text):
|
|
|
60 |
def calculate_abuse_level(scores, thresholds, motif_hits=None):
|
61 |
weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
|
62 |
base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
|
|
|
|
|
|
|
63 |
return base_score
|
64 |
|
65 |
def interpret_abuse_level(score):
|
|
|
81 |
with torch.no_grad():
|
82 |
outputs = model(**inputs)
|
83 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
|
|
|
|
|
|
84 |
abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
|
85 |
+
top_label = LABELS[np.argmax(scores)]
|
86 |
+
return abuse_level, top_label
|
|
|
|
|
87 |
|
88 |
def analyze_composite(msg1, msg2, msg3, flags):
|
89 |
thresholds = THRESHOLDS
|
90 |
+
messages = [m for m in [msg1, msg2, msg3] if m.strip()]
|
91 |
+
results = [analyze_single_message(m, thresholds, flags) for m in messages]
|
92 |
if not results:
|
93 |
return "Please enter at least one message."
|
94 |
+
abuse_scores = [score for score, _ in results]
|
95 |
+
labels = [label.replace("_", " ") for _, label in results]
|
96 |
+
composite = round(np.mean(abuse_scores), 2)
|
97 |
+
abuse_desc = interpret_abuse_level(composite)
|
98 |
+
top_labels = ", ".join(labels)
|
99 |
+
return f"The messages analyzed show signs of {top_labels}, indicating a {composite}% likelihood of abusive communication ({abuse_desc})."
|
|
|
|
|
100 |
|
101 |
txt_inputs = [
|
102 |
gr.Textbox(label="Message 1"),
|
|
|
118 |
)
|
119 |
|
120 |
if __name__ == "__main__":
|
121 |
+
iface.launch()
|