Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -413,10 +413,12 @@ def compute_abuse_score(matched_scores, sentiment):
|
|
413 |
return min(adjusted_score, 100)
|
414 |
|
415 |
|
|
|
416 |
def analyze_single_message(text, thresholds):
|
|
|
417 |
stage = 1
|
418 |
motif_hits, matched_phrases = detect_motifs(text)
|
419 |
-
|
420 |
# Get emotion profile
|
421 |
emotion_profile = get_emotion_profile(text)
|
422 |
sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
|
@@ -427,7 +429,7 @@ def analyze_single_message(text, thresholds):
|
|
427 |
outputs = model(**inputs)
|
428 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
429 |
|
430 |
-
# Sentiment override
|
431 |
if emotion_profile.get("neutral", 0) > 0.85 and any(
|
432 |
scores[LABELS.index(l)] > thresholds[l]
|
433 |
for l in ["control", "blame shifting"]
|
@@ -442,85 +444,79 @@ def analyze_single_message(text, thresholds):
|
|
442 |
k: v + 0.05 if sentiment == "supportive" else v
|
443 |
for k, v in thresholds.items()
|
444 |
}
|
|
|
445 |
darvo_score = predict_darvo_score(text)
|
446 |
|
447 |
threshold_labels = [
|
448 |
label for label, score in zip(LABELS, scores)
|
449 |
if score > adjusted_thresholds[label]
|
450 |
]
|
451 |
-
|
|
|
452 |
if not threshold_labels:
|
453 |
-
|
454 |
-
darvo_score = 0.0
|
455 |
-
top_patterns = []
|
456 |
-
tone_tag = "supportive"
|
457 |
-
return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
|
458 |
|
459 |
top_patterns = sorted(
|
460 |
[(label, score) for label, score in zip(LABELS, scores)],
|
461 |
key=lambda x: x[1],
|
462 |
reverse=True
|
463 |
)[:2]
|
464 |
-
# Post-threshold validation: strip recovery if it occurs with undermining sentiment
|
465 |
-
if "recovery" in threshold_labels and tone_tag == "forced accountability flip":
|
466 |
-
threshold_labels.remove("recovery")
|
467 |
-
top_patterns = [p for p in top_patterns if p[0] != "recovery"]
|
468 |
-
print("⚠️ Removing 'recovery' due to undermining sentiment (not genuine repair)")
|
469 |
|
470 |
matched_scores = [
|
471 |
(label, score, PATTERN_WEIGHTS.get(label, 1.0))
|
472 |
for label, score in zip(LABELS, scores)
|
473 |
if score > adjusted_thresholds[label]
|
474 |
]
|
475 |
-
|
|
|
476 |
insults_score = next((score for label, score, _ in matched_scores if label == "insults"), 0)
|
477 |
insult_label_display = None
|
478 |
if insults_score > 0.9 and (emotion_profile.get("anger", 0) > 0.1 or emotion_profile.get("disgust", 0) > 0.1):
|
479 |
insult_label_display = "Direct Insult"
|
480 |
elif 0.5 < insults_score <= 0.9 and emotion_profile.get("neutral", 0) > 0.85:
|
481 |
insult_label_display = "Subtle Undermining"
|
|
|
|
|
482 |
abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
|
483 |
-
abuse_score = abuse_score_raw
|
484 |
|
485 |
-
#
|
486 |
-
stage = get_risk_stage(threshold_labels, sentiment) if threshold_labels else 1
|
487 |
-
if weapon_flag and stage < 2:
|
488 |
-
stage = 2
|
489 |
if weapon_flag:
|
490 |
abuse_score_raw = min(abuse_score_raw + 25, 100)
|
491 |
-
|
492 |
-
|
493 |
|
494 |
abuse_score = min(abuse_score_raw, 100 if "control" in threshold_labels else 95)
|
495 |
|
496 |
-
#
|
497 |
tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
#
|
506 |
profane_words = {"fuck", "fucking", "bitch", "shit", "cunt", "ho", "asshole", "dick", "whore", "slut"}
|
507 |
tokens = set(text.lower().split())
|
508 |
has_profane = any(word in tokens for word in profane_words)
|
509 |
-
|
510 |
-
anger_score = emotion_profile.get("Anger", 0)
|
511 |
short_text = len(tokens) <= 10
|
512 |
-
|
513 |
-
|
514 |
if has_profane and anger_score > 0.75 and short_text:
|
515 |
print("⚠️ Profanity + Anger Override Triggered")
|
516 |
-
|
517 |
-
if
|
518 |
-
top_patterns
|
519 |
if "insults" not in threshold_labels:
|
520 |
threshold_labels.append("insults")
|
521 |
-
|
522 |
-
#
|
523 |
-
|
|
|
|
|
|
|
|
|
|
|
524 |
# Debug
|
525 |
print(f"Emotional Tone Tag: {tone_tag}")
|
526 |
print("Emotion Profile:")
|
@@ -538,6 +534,7 @@ def analyze_single_message(text, thresholds):
|
|
538 |
print("------------------\n")
|
539 |
|
540 |
return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
|
|
|
541 |
import spaces
|
542 |
|
543 |
@spaces.GPU
|
|
|
413 |
return min(adjusted_score, 100)
|
414 |
|
415 |
|
416 |
+
|
417 |
def analyze_single_message(text, thresholds):
|
418 |
+
print("⚡ ENTERED analyze_single_message")
|
419 |
stage = 1
|
420 |
motif_hits, matched_phrases = detect_motifs(text)
|
421 |
+
|
422 |
# Get emotion profile
|
423 |
emotion_profile = get_emotion_profile(text)
|
424 |
sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
|
|
|
429 |
outputs = model(**inputs)
|
430 |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
|
431 |
|
432 |
+
# Sentiment override
|
433 |
if emotion_profile.get("neutral", 0) > 0.85 and any(
|
434 |
scores[LABELS.index(l)] > thresholds[l]
|
435 |
for l in ["control", "blame shifting"]
|
|
|
444 |
k: v + 0.05 if sentiment == "supportive" else v
|
445 |
for k, v in thresholds.items()
|
446 |
}
|
447 |
+
|
448 |
darvo_score = predict_darvo_score(text)
|
449 |
|
450 |
threshold_labels = [
|
451 |
label for label, score in zip(LABELS, scores)
|
452 |
if score > adjusted_thresholds[label]
|
453 |
]
|
454 |
+
|
455 |
+
# Early exit if nothing passed
|
456 |
if not threshold_labels:
|
457 |
+
return 0.0, [], [], {"label": sentiment}, 1, 0.0, "supportive"
|
|
|
|
|
|
|
|
|
458 |
|
459 |
top_patterns = sorted(
|
460 |
[(label, score) for label, score in zip(LABELS, scores)],
|
461 |
key=lambda x: x[1],
|
462 |
reverse=True
|
463 |
)[:2]
|
|
|
|
|
|
|
|
|
|
|
464 |
|
465 |
matched_scores = [
|
466 |
(label, score, PATTERN_WEIGHTS.get(label, 1.0))
|
467 |
for label, score in zip(LABELS, scores)
|
468 |
if score > adjusted_thresholds[label]
|
469 |
]
|
470 |
+
|
471 |
+
# Determine insult subtype
|
472 |
insults_score = next((score for label, score, _ in matched_scores if label == "insults"), 0)
|
473 |
insult_label_display = None
|
474 |
if insults_score > 0.9 and (emotion_profile.get("anger", 0) > 0.1 or emotion_profile.get("disgust", 0) > 0.1):
|
475 |
insult_label_display = "Direct Insult"
|
476 |
elif 0.5 < insults_score <= 0.9 and emotion_profile.get("neutral", 0) > 0.85:
|
477 |
insult_label_display = "Subtle Undermining"
|
478 |
+
|
479 |
+
# Abuse score
|
480 |
abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
|
|
|
481 |
|
482 |
+
# Weapon adjustment
|
|
|
|
|
|
|
483 |
if weapon_flag:
|
484 |
abuse_score_raw = min(abuse_score_raw + 25, 100)
|
485 |
+
if stage < 2:
|
486 |
+
stage = 2
|
487 |
|
488 |
abuse_score = min(abuse_score_raw, 100 if "control" in threshold_labels else 95)
|
489 |
|
490 |
+
# Tone tag
|
491 |
tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
|
492 |
+
|
493 |
+
# Remove recovery tag if tone is fake
|
494 |
+
if "recovery" in threshold_labels and tone_tag == "forced accountability flip":
|
495 |
+
threshold_labels.remove("recovery")
|
496 |
+
top_patterns = [p for p in top_patterns if p[0] != "recovery"]
|
497 |
+
print("⚠️ Removing 'recovery' due to undermining sentiment (not genuine repair)")
|
498 |
+
|
499 |
+
# Override profanity/anger for short texts
|
500 |
profane_words = {"fuck", "fucking", "bitch", "shit", "cunt", "ho", "asshole", "dick", "whore", "slut"}
|
501 |
tokens = set(text.lower().split())
|
502 |
has_profane = any(word in tokens for word in profane_words)
|
|
|
|
|
503 |
short_text = len(tokens) <= 10
|
504 |
+
anger_score = emotion_profile.get("anger", 0)
|
|
|
505 |
if has_profane and anger_score > 0.75 and short_text:
|
506 |
print("⚠️ Profanity + Anger Override Triggered")
|
507 |
+
insult_score = next((s for l, s in top_patterns if l == "insults"), 0)
|
508 |
+
if ("insults", insult_score) not in top_patterns:
|
509 |
+
top_patterns = [("insults", insult_score)] + top_patterns
|
510 |
if "insults" not in threshold_labels:
|
511 |
threshold_labels.append("insults")
|
512 |
+
|
513 |
+
# Replace 'insults' with descriptive label in output
|
514 |
+
if insult_label_display and "insults" in threshold_labels:
|
515 |
+
threshold_labels = [
|
516 |
+
insult_label_display if label == "insults" else label
|
517 |
+
for label in threshold_labels
|
518 |
+
]
|
519 |
+
|
520 |
# Debug
|
521 |
print(f"Emotional Tone Tag: {tone_tag}")
|
522 |
print("Emotion Profile:")
|
|
|
534 |
print("------------------\n")
|
535 |
|
536 |
return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
|
537 |
+
|
538 |
import spaces
|
539 |
|
540 |
@spaces.GPU
|