Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -348,20 +348,62 @@ def get_risk_stage(patterns, sentiment):
|
|
348 |
return 1
|
349 |
|
350 |
def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
if abuse_score >= 85 or escalation_score >= 16:
|
352 |
risk_level = "high"
|
353 |
-
elif abuse_score >= 60 or escalation_score >= 8:
|
354 |
risk_level = "moderate"
|
355 |
elif stage == 2 and abuse_score >= 40:
|
356 |
-
risk_level = "moderate"
|
357 |
else:
|
358 |
risk_level = "low"
|
|
|
359 |
if isinstance(top_label, str) and " – " in top_label:
|
360 |
pattern_label, pattern_score = top_label.split(" – ")
|
361 |
else:
|
362 |
pattern_label = str(top_label) if top_label is not None else "Unknown"
|
363 |
pattern_score = ""
|
364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
365 |
WHY_FLAGGED = {
|
366 |
"control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
|
367 |
"gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
|
|
|
348 |
return 1
|
349 |
|
350 |
def generate_risk_snippet(abuse_score, top_label, escalation_score, stage):
|
351 |
+
# Extract aggression score from the top label if it's aggression
|
352 |
+
if isinstance(top_label, str) and "aggression" in top_label.lower():
|
353 |
+
try:
|
354 |
+
aggression_score = int(top_label.split("–")[1].replace("%", "").strip()) / 100
|
355 |
+
except:
|
356 |
+
aggression_score = 0
|
357 |
+
else:
|
358 |
+
aggression_score = 0
|
359 |
+
|
360 |
if abuse_score >= 85 or escalation_score >= 16:
|
361 |
risk_level = "high"
|
362 |
+
elif abuse_score >= 60 or escalation_score >= 8 or aggression_score >= 0.75:
|
363 |
risk_level = "moderate"
|
364 |
elif stage == 2 and abuse_score >= 40:
|
365 |
+
risk_level = "moderate"
|
366 |
else:
|
367 |
risk_level = "low"
|
368 |
+
|
369 |
if isinstance(top_label, str) and " – " in top_label:
|
370 |
pattern_label, pattern_score = top_label.split(" – ")
|
371 |
else:
|
372 |
pattern_label = str(top_label) if top_label is not None else "Unknown"
|
373 |
pattern_score = ""
|
374 |
|
375 |
+
WHY_FLAGGED = {
|
376 |
+
"control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
|
377 |
+
"gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
|
378 |
+
"dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
|
379 |
+
"insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
|
380 |
+
"threat": "This message includes threatening language, which is a strong predictor of harm.",
|
381 |
+
"blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
|
382 |
+
"guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
|
383 |
+
"recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
|
384 |
+
"projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
|
385 |
+
"contradictory statements": "This message may contain internal contradictions used to confuse, destabilize, or deflect responsibility.",
|
386 |
+
"obscure language": "This message may use overly formal, vague, or complex language to obscure meaning or avoid accountability.",
|
387 |
+
"default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
|
388 |
+
}
|
389 |
+
|
390 |
+
explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
|
391 |
+
|
392 |
+
base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
|
393 |
+
base += f"This message shows strong indicators of **{pattern_label}**. "
|
394 |
+
|
395 |
+
if risk_level == "high":
|
396 |
+
base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
|
397 |
+
elif risk_level == "moderate":
|
398 |
+
base += "There are signs of emotional pressure or verbal aggression that may escalate if repeated.\n"
|
399 |
+
else:
|
400 |
+
base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
|
401 |
+
|
402 |
+
base += f"\n💡 *Why this might be flagged:*\n{explanation}\n"
|
403 |
+
base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
|
404 |
+
base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
|
405 |
+
return base
|
406 |
+
|
407 |
WHY_FLAGGED = {
|
408 |
"control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
|
409 |
"gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
|