Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ LABELS = [
|
|
21 |
]
|
22 |
|
23 |
THRESHOLDS = {
|
24 |
-
"blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.
|
25 |
"gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
|
26 |
"projection": 0.35, "recovery phase": 0.25, "threat": 0.25
|
27 |
}
|
@@ -118,7 +118,6 @@ def generate_risk_snippet(abuse_score, top_label):
|
|
118 |
def analyze_single_message(text, thresholds, motif_flags):
|
119 |
motif_hits, matched_phrases = detect_motifs(text)
|
120 |
|
121 |
-
# SST Sentiment
|
122 |
result = sst_pipeline(text)[0]
|
123 |
sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
|
124 |
sentiment_score = result['score'] if sentiment == "undermining" else 0.0
|
@@ -136,10 +135,14 @@ def analyze_single_message(text, thresholds, motif_flags):
|
|
136 |
outputs = model(**inputs)
|
137 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
138 |
|
139 |
-
threshold_labels = [
|
140 |
-
|
141 |
-
if
|
142 |
-
|
|
|
|
|
|
|
|
|
143 |
top_patterns = sorted(
|
144 |
[(label, score) for label, score in zip(LABELS, scores)],
|
145 |
key=lambda x: x[1],
|
@@ -148,17 +151,18 @@ def analyze_single_message(text, thresholds, motif_flags):
|
|
148 |
|
149 |
pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
|
150 |
darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
|
151 |
-
|
152 |
print("\n--- Debug Info ---")
|
153 |
print(f"Text: {text}")
|
154 |
print(f"Sentiment: {sentiment} (raw: {result['label']}, score: {result['score']:.3f})")
|
155 |
print("Abuse Pattern Scores:")
|
156 |
for label, score in zip(LABELS, scores):
|
157 |
-
passed = "β
" if
|
158 |
print(f" {label:25} β {score:.3f} {passed}")
|
159 |
print(f"Motifs: {motifs}")
|
160 |
print(f"Contradiction: {contradiction_flag}")
|
161 |
print("------------------\n")
|
|
|
162 |
return (
|
163 |
np.mean([score for _, score in top_patterns]) * 100,
|
164 |
threshold_labels,
|
@@ -166,7 +170,6 @@ def analyze_single_message(text, thresholds, motif_flags):
|
|
166 |
darvo_score,
|
167 |
{"label": sentiment, "raw_label": result['label'], "score": result['score']}
|
168 |
)
|
169 |
-
|
170 |
def analyze_composite(msg1, msg2, msg3, *answers_and_none):
|
171 |
responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
|
172 |
none_selected = answers_and_none[-1]
|
@@ -191,7 +194,7 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
|
|
191 |
|
192 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
193 |
out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
|
194 |
-
out += generate_risk_snippet(composite_abuse, top_label)
|
195 |
if avg_darvo > 0.25:
|
196 |
level = "moderate" if avg_darvo < 0.65 else "high"
|
197 |
out += f"\n\nDARVO Score: {avg_darvo} β This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
|
|
|
21 |
]
|
22 |
|
23 |
THRESHOLDS = {
|
24 |
+
"blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.48, "dismissiveness": 0.45,
|
25 |
"gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
|
26 |
"projection": 0.35, "recovery phase": 0.25, "threat": 0.25
|
27 |
}
|
|
|
118 |
def analyze_single_message(text, thresholds, motif_flags):
|
119 |
motif_hits, matched_phrases = detect_motifs(text)
|
120 |
|
|
|
121 |
result = sst_pipeline(text)[0]
|
122 |
sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
|
123 |
sentiment_score = result['score'] if sentiment == "undermining" else 0.0
|
|
|
135 |
outputs = model(**inputs)
|
136 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
137 |
|
138 |
+
threshold_labels = []
|
139 |
+
for label, score in zip(LABELS, scores):
|
140 |
+
if label in {"control", "dismissiveness", "blame shifting"}:
|
141 |
+
if sentiment == "undermining" and result["score"] > 0.85 and score > adjusted_thresholds[label]:
|
142 |
+
threshold_labels.append(label)
|
143 |
+
elif score > adjusted_thresholds[label]:
|
144 |
+
threshold_labels.append(label)
|
145 |
+
|
146 |
top_patterns = sorted(
|
147 |
[(label, score) for label, score in zip(LABELS, scores)],
|
148 |
key=lambda x: x[1],
|
|
|
151 |
|
152 |
pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
|
153 |
darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
|
154 |
+
|
155 |
print("\n--- Debug Info ---")
|
156 |
print(f"Text: {text}")
|
157 |
print(f"Sentiment: {sentiment} (raw: {result['label']}, score: {result['score']:.3f})")
|
158 |
print("Abuse Pattern Scores:")
|
159 |
for label, score in zip(LABELS, scores):
|
160 |
+
passed = "β
" if label in threshold_labels else "β"
|
161 |
print(f" {label:25} β {score:.3f} {passed}")
|
162 |
print(f"Motifs: {motifs}")
|
163 |
print(f"Contradiction: {contradiction_flag}")
|
164 |
print("------------------\n")
|
165 |
+
|
166 |
return (
|
167 |
np.mean([score for _, score in top_patterns]) * 100,
|
168 |
threshold_labels,
|
|
|
170 |
darvo_score,
|
171 |
{"label": sentiment, "raw_label": result['label'], "score": result['score']}
|
172 |
)
|
|
|
173 |
def analyze_composite(msg1, msg2, msg3, *answers_and_none):
|
174 |
responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
|
175 |
none_selected = answers_and_none[-1]
|
|
|
194 |
|
195 |
out = f"Abuse Intensity: {composite_abuse}%\n"
|
196 |
out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
|
197 |
+
out += generate_risk_snippet(composite_abuse, top_label, escalation_score)
|
198 |
if avg_darvo > 0.25:
|
199 |
level = "moderate" if avg_darvo < 0.65 else "high"
|
200 |
out += f"\n\nDARVO Score: {avg_darvo} β This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
|