Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -4,152 +4,164 @@ import numpy as np
|
|
4 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
5 |
from transformers import RobertaForSequenceClassification, RobertaTokenizer
|
6 |
|
7 |
-
|
8 |
-
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
|
9 |
-
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
|
10 |
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
13 |
model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
|
14 |
tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
15 |
|
16 |
LABELS = [
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
]
|
21 |
|
22 |
THRESHOLDS = {
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
}
|
28 |
|
29 |
PATTERN_LABELS = LABELS[:15]
|
30 |
DANGER_LABELS = LABELS[15:18]
|
31 |
|
32 |
EXPLANATIONS = {
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
}
|
51 |
|
52 |
def custom_sentiment(text):
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
score = probs[0][label_idx].item()
|
64 |
-
return {"label": label, "score": score}
|
65 |
|
66 |
def calculate_abuse_level(scores, thresholds):
|
67 |
-
|
68 |
-
|
69 |
|
70 |
def interpret_abuse_level(score):
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
|
77 |
def analyze_messages(input_text, risk_flags):
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
|
86 |
-
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
|
93 |
-
|
94 |
-
|
95 |
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
|
100 |
-
|
101 |
-
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
|
107 |
-
|
108 |
-
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
if label != "non_abusive"
|
118 |
-
]
|
119 |
-
top_patterns = sorted(scored_patterns, key=lambda x: x[1], reverse=True)[:2]
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
|
139 |
-
|
140 |
|
141 |
iface = gr.Interface(
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
)
|
153 |
|
154 |
-
if
|
155 |
-
|
|
|
4 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
5 |
from transformers import RobertaForSequenceClassification, RobertaTokenizer
|
6 |
|
7 |
+
Load custom fine-tuned sentiment model
|
|
|
|
|
8 |
|
9 |
+
sentiment_model = AutoModelForSequenceClassification.from_pretrained(βSamanthaStorm/tether-sentimentβ)
|
10 |
+
sentiment_tokenizer = AutoTokenizer.from_pretrained(βSamanthaStorm/tether-sentimentβ)
|
11 |
+
|
12 |
+
Load abuse pattern model
|
13 |
+
|
14 |
+
model_name = βSamanthaStorm/abuse-pattern-detector-v2β
|
15 |
model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
|
16 |
tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
17 |
|
18 |
LABELS = [
|
19 |
+
βgaslightingβ, βmockeryβ, βdismissivenessβ, βcontrolβ, βguilt_trippingβ, βapology_baitingβ, βblame_shiftingβ, βprojectionβ,
|
20 |
+
βcontradictory_statementsβ, βmanipulationβ, βdeflectionβ, βinsultsβ, βobscure_formalβ, βrecovery_phaseβ, βnon_abusiveβ,
|
21 |
+
βsuicidal_threatβ, βphysical_threatβ, βextreme_controlβ
|
22 |
]
|
23 |
|
24 |
THRESHOLDS = {
|
25 |
+
βgaslightingβ: 0.25, βmockeryβ: 0.15, βdismissivenessβ: 0.30, βcontrolβ: 0.43, βguilt_trippingβ: 0.19,
|
26 |
+
βapology_baitingβ: 0.45, βblame_shiftingβ: 0.23, βprojectionβ: 0.50, βcontradictory_statementsβ: 0.25,
|
27 |
+
βmanipulationβ: 0.25, βdeflectionβ: 0.30, βinsultsβ: 0.34, βobscure_formalβ: 0.25, βrecovery_phaseβ: 0.25,
|
28 |
+
βnon_abusiveβ: 2.0, βsuicidal_threatβ: 0.45, βphysical_threatβ: 0.02, βextreme_controlβ: 0.36
|
29 |
}
|
30 |
|
31 |
PATTERN_LABELS = LABELS[:15]
|
32 |
DANGER_LABELS = LABELS[15:18]
|
33 |
|
34 |
EXPLANATIONS = {
|
35 |
+
βgaslightingβ: βGaslighting involves making someone question their own reality or perceptionsβ¦β,
|
36 |
+
βblame_shiftingβ: βBlame-shifting is when one person redirects the responsibilityβ¦β,
|
37 |
+
βprojectionβ: βProjection involves accusing the victim of behaviors the abuser exhibits.β,
|
38 |
+
βdismissivenessβ: βDismissiveness is belittling or disregarding another personβs feelings.β,
|
39 |
+
βmockeryβ: βMockery ridicules someone in a hurtful, humiliating way.β,
|
40 |
+
βrecovery_phaseβ: βRecovery phase dismisses someoneβs emotional healing process.β,
|
41 |
+
βinsultsβ: βInsults are derogatory remarks aimed at degrading someone.β,
|
42 |
+
βapology_baitingβ: βApology-baiting manipulates victims into apologizing for abuserβs behavior.β,
|
43 |
+
βdeflectionβ: βDeflection avoids accountability by redirecting blame.β,
|
44 |
+
βcontrolβ: βControl restricts autonomy through manipulation or coercion.β,
|
45 |
+
βextreme_controlβ: βExtreme control dominates decisions and behaviors entirely.β,
|
46 |
+
βphysical_threatβ: βPhysical threats signal risk of bodily harm.β,
|
47 |
+
βsuicidal_threatβ: βSuicidal threats manipulate others using self-harm threats.β,
|
48 |
+
βguilt_trippingβ: βGuilt-tripping uses guilt to manipulate someoneβs actions.β,
|
49 |
+
βmanipulationβ: βManipulation deceives to influence or control outcomes.β,
|
50 |
+
βnon_abusiveβ: βNon-abusive language is respectful and free of coercion.β,
|
51 |
+
βobscure_formalβ: βObscure/formal language manipulates through confusion or superiority.β
|
52 |
}
|
53 |
|
54 |
def custom_sentiment(text):
|
55 |
+
inputs = sentiment_tokenizer(text, return_tensors=βptβ, truncation=True, padding=True)
|
56 |
+
with torch.no_grad():
|
57 |
+
outputs = sentiment_model(**inputs)
|
58 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
59 |
+
label_idx = torch.argmax(probs).item()
|
60 |
+
label_map = {0: βsupportiveβ, 1: βunderminingβ}
|
61 |
+
label = label_map[label_idx]
|
62 |
+
score = probs[0][label_idx].item()
|
63 |
+
return {βlabelβ: label, βscoreβ: score}
|
|
|
|
|
|
|
64 |
|
65 |
def calculate_abuse_level(scores, thresholds):
|
66 |
+
triggered_scores = [score for label, score in zip(LABELS, scores) if score > thresholds[label]]
|
67 |
+
return round(np.mean(triggered_scores) * 100, 2) if triggered_scores else 0.0
|
68 |
|
69 |
def interpret_abuse_level(score):
|
70 |
+
if score > 80: return βExtreme / High Riskβ
|
71 |
+
elif score > 60: return βSevere / Harmful Pattern Presentβ
|
72 |
+
elif score > 40: return βLikely Abuseβ
|
73 |
+
elif score > 20: return βMild Concernβ
|
74 |
+
return βVery Low / Likely Safeβ
|
75 |
|
76 |
def analyze_messages(input_text, risk_flags):
|
77 |
+
input_text = input_text.strip()
|
78 |
+
if not input_text:
|
79 |
+
return βPlease enter a message for analysis.β
|
80 |
|
81 |
+
sentiment = custom_sentiment(input_text)
|
82 |
+
sentiment_label = sentiment['label']
|
83 |
+
sentiment_score = sentiment['score']
|
84 |
|
85 |
+
adjusted_thresholds = {k: v * 0.8 for k, v in THRESHOLDS.items()} if sentiment_label == "undermining" else THRESHOLDS.copy()
|
86 |
|
87 |
+
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
|
88 |
+
with torch.no_grad():
|
89 |
+
outputs = model(**inputs)
|
90 |
+
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
91 |
|
92 |
+
pattern_count = sum(score > adjusted_thresholds[label] for label, score in zip(PATTERN_LABELS, scores[:15]))
|
93 |
+
danger_flag_count = sum(score > adjusted_thresholds[label] for label, score in zip(DANGER_LABELS, scores[15:18]))
|
94 |
|
95 |
+
contextual_flags = risk_flags if risk_flags else []
|
96 |
+
if len(contextual_flags) >= 2:
|
97 |
+
danger_flag_count += 1
|
98 |
|
99 |
+
critical_flags = ["They've threatened harm", "They monitor/follow me", "I feel unsafe when alone with them"]
|
100 |
+
high_risk_context = any(flag in contextual_flags for flag in critical_flags)
|
101 |
|
102 |
+
non_abusive_score = scores[LABELS.index('non_abusive')]
|
103 |
+
if non_abusive_score > adjusted_thresholds['non_abusive']:
|
104 |
+
return "This message is classified as non-abusive."
|
105 |
|
106 |
+
abuse_level = calculate_abuse_level(scores, adjusted_thresholds)
|
107 |
+
abuse_description = interpret_abuse_level(abuse_level)
|
108 |
|
109 |
+
if danger_flag_count >= 2:
|
110 |
+
resources = "Immediate assistance recommended. Please seek professional help or contact emergency services."
|
111 |
+
else:
|
112 |
+
resources = "For more information on abuse patterns, consider reaching out to support groups or professional counselors."
|
113 |
|
114 |
+
scored_patterns = [(label, scores[LABELS.index(label)]) for label in PATTERN_LABELS if label != "non_abusive"]
|
115 |
+
scored_dangers = [(label, scores[LABELS.index(label)]) for label in DANGER_LABELS]
|
|
|
|
|
|
|
116 |
|
117 |
+
top_patterns = sorted(
|
118 |
+
[(label, score) for label, score in scored_patterns if score > adjusted_thresholds[label]],
|
119 |
+
key=lambda x: x[1], reverse=True
|
120 |
+
)[:2]
|
121 |
+
|
122 |
+
top_dangers = sorted(
|
123 |
+
[(label, score) for label, score in scored_dangers if score > adjusted_thresholds[label]],
|
124 |
+
key=lambda x: x[1], reverse=True
|
125 |
+
)
|
126 |
|
127 |
+
highlighted_danger = top_dangers[0] if top_dangers else None
|
128 |
+
|
129 |
+
top_pattern_explanations = "\n".join([
|
130 |
+
f"\u2022 {label.replace('_', ' ').title()}: {EXPLANATIONS.get(label, 'No explanation available.')}"
|
131 |
+
for label, _ in top_patterns
|
132 |
+
])
|
133 |
+
|
134 |
+
if highlighted_danger and highlighted_danger[0] not in [label for label, _ in top_patterns]:
|
135 |
+
danger_label = highlighted_danger[0]
|
136 |
+
top_pattern_explanations += f"\n\u2022 {danger_label.replace('_', ' ').title()}: {EXPLANATIONS.get(danger_label, 'No explanation available.')} (Danger Pattern)"
|
137 |
+
|
138 |
+
result = (
|
139 |
+
f"Abuse Risk Score: {abuse_level}% β {abuse_description}\n\n"
|
140 |
+
f"Most Likely Patterns:\n{top_pattern_explanations}\n\n"
|
141 |
+
f"β οΈ Critical Danger Flags Detected: {danger_flag_count} of 3\n"
|
142 |
+
"Resources: " + resources + "\n\n"
|
143 |
+
f"Sentiment: {sentiment_label.title()} (Confidence: {sentiment_score*100:.2f}%)"
|
144 |
+
)
|
145 |
|
146 |
+
if contextual_flags:
|
147 |
+
result += "\n\nβ οΈ You indicated the following:\n" + "\n".join([f"β’ {flag}" for flag in contextual_flags])
|
148 |
+
if high_risk_context:
|
149 |
+
result += "\n\nπ¨ These responses suggest a high-risk situation. Consider seeking immediate help or safety planning resources."
|
150 |
|
151 |
+
return result
|
152 |
|
153 |
iface = gr.Interface(
|
154 |
+
fn=analyze_messages,
|
155 |
+
inputs=[
|
156 |
+
gr.Textbox(lines=10, placeholder=βEnter message hereβ¦β),
|
157 |
+
gr.CheckboxGroup(label=βDo any of these apply to your situation?β, choices=[
|
158 |
+
βTheyβve threatened harmβ, βThey isolate meβ, βIβve changed my behavior out of fearβ,
|
159 |
+
βThey monitor/follow meβ, βI feel unsafe when alone with themβ
|
160 |
+
])
|
161 |
+
],
|
162 |
+
outputs=[gr.Textbox(label=βAnalysis Resultβ)],
|
163 |
+
title=βAbuse Pattern Detectorβ
|
164 |
)
|
165 |
|
166 |
+
if name == βmainβ:
|
167 |
+
iface.launch()
|