SamanthaStorm commited on
Commit
a10566d
·
verified ·
1 Parent(s): bcfcd91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -40
app.py CHANGED
@@ -413,10 +413,12 @@ def compute_abuse_score(matched_scores, sentiment):
413
  return min(adjusted_score, 100)
414
 
415
 
 
416
  def analyze_single_message(text, thresholds):
 
417
  stage = 1
418
  motif_hits, matched_phrases = detect_motifs(text)
419
-
420
  # Get emotion profile
421
  emotion_profile = get_emotion_profile(text)
422
  sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
@@ -427,7 +429,7 @@ def analyze_single_message(text, thresholds):
427
  outputs = model(**inputs)
428
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
429
 
430
- # Sentiment override if neutral is high while critical thresholds are passed
431
  if emotion_profile.get("neutral", 0) > 0.85 and any(
432
  scores[LABELS.index(l)] > thresholds[l]
433
  for l in ["control", "blame shifting"]
@@ -442,85 +444,79 @@ def analyze_single_message(text, thresholds):
442
  k: v + 0.05 if sentiment == "supportive" else v
443
  for k, v in thresholds.items()
444
  }
 
445
  darvo_score = predict_darvo_score(text)
446
 
447
  threshold_labels = [
448
  label for label, score in zip(LABELS, scores)
449
  if score > adjusted_thresholds[label]
450
  ]
451
- # Suppress all outputs if no abuse labels pass threshold
 
452
  if not threshold_labels:
453
- abuse_score = 0.0
454
- darvo_score = 0.0
455
- top_patterns = []
456
- tone_tag = "supportive"
457
- return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
458
 
459
  top_patterns = sorted(
460
  [(label, score) for label, score in zip(LABELS, scores)],
461
  key=lambda x: x[1],
462
  reverse=True
463
  )[:2]
464
- # Post-threshold validation: strip recovery if it occurs with undermining sentiment
465
- if "recovery" in threshold_labels and tone_tag == "forced accountability flip":
466
- threshold_labels.remove("recovery")
467
- top_patterns = [p for p in top_patterns if p[0] != "recovery"]
468
- print("⚠️ Removing 'recovery' due to undermining sentiment (not genuine repair)")
469
 
470
  matched_scores = [
471
  (label, score, PATTERN_WEIGHTS.get(label, 1.0))
472
  for label, score in zip(LABELS, scores)
473
  if score > adjusted_thresholds[label]
474
  ]
475
- # Classify insult pattern more precisely for UI
 
476
  insults_score = next((score for label, score, _ in matched_scores if label == "insults"), 0)
477
  insult_label_display = None
478
  if insults_score > 0.9 and (emotion_profile.get("anger", 0) > 0.1 or emotion_profile.get("disgust", 0) > 0.1):
479
  insult_label_display = "Direct Insult"
480
  elif 0.5 < insults_score <= 0.9 and emotion_profile.get("neutral", 0) > 0.85:
481
  insult_label_display = "Subtle Undermining"
 
 
482
  abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
483
- abuse_score = abuse_score_raw
484
 
485
- # Risk stage logic
486
- stage = get_risk_stage(threshold_labels, sentiment) if threshold_labels else 1
487
- if weapon_flag and stage < 2:
488
- stage = 2
489
  if weapon_flag:
490
  abuse_score_raw = min(abuse_score_raw + 25, 100)
491
-
492
-
493
 
494
  abuse_score = min(abuse_score_raw, 100 if "control" in threshold_labels else 95)
495
 
496
- # Tag must happen after abuse score is finalized
497
  tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
498
- # Final display tweak: swap 'insults' with its refined label
499
- if insult_label_display and "insults" in threshold_labels:
500
- threshold_labels = [
501
- insult_label_display if label == "insults" else label
502
- for label in threshold_labels
503
- ]
504
- return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
505
- # ---- Profanity + Anger Override Logic ----
506
  profane_words = {"fuck", "fucking", "bitch", "shit", "cunt", "ho", "asshole", "dick", "whore", "slut"}
507
  tokens = set(text.lower().split())
508
  has_profane = any(word in tokens for word in profane_words)
509
-
510
- anger_score = emotion_profile.get("Anger", 0)
511
  short_text = len(tokens) <= 10
512
- insult_score = next((s for l, s in top_patterns if l == "insults"), 0)
513
-
514
  if has_profane and anger_score > 0.75 and short_text:
515
  print("⚠️ Profanity + Anger Override Triggered")
516
- top_patterns = sorted(top_patterns, key=lambda x: x[1], reverse=True)
517
- if top_patterns[0][0] != "insults":
518
- top_patterns.insert(0, ("insults", insult_score))
519
  if "insults" not in threshold_labels:
520
  threshold_labels.append("insults")
521
- top_patterns = [("insults", insult_score)] + [p for p in top_patterns if p[0] != "insults"]
522
- # Debug
523
- print(f"Emotional Tone Tag: {tone_tag}")
 
 
 
 
 
524
  # Debug
525
  print(f"Emotional Tone Tag: {tone_tag}")
526
  print("Emotion Profile:")
@@ -538,6 +534,7 @@ def analyze_single_message(text, thresholds):
538
  print("------------------\n")
539
 
540
  return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
 
541
  import spaces
542
 
543
  @spaces.GPU
 
413
  return min(adjusted_score, 100)
414
 
415
 
416
+
417
  def analyze_single_message(text, thresholds):
418
+ print("⚡ ENTERED analyze_single_message")
419
  stage = 1
420
  motif_hits, matched_phrases = detect_motifs(text)
421
+
422
  # Get emotion profile
423
  emotion_profile = get_emotion_profile(text)
424
  sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
 
429
  outputs = model(**inputs)
430
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
431
 
432
+ # Sentiment override
433
  if emotion_profile.get("neutral", 0) > 0.85 and any(
434
  scores[LABELS.index(l)] > thresholds[l]
435
  for l in ["control", "blame shifting"]
 
444
  k: v + 0.05 if sentiment == "supportive" else v
445
  for k, v in thresholds.items()
446
  }
447
+
448
  darvo_score = predict_darvo_score(text)
449
 
450
  threshold_labels = [
451
  label for label, score in zip(LABELS, scores)
452
  if score > adjusted_thresholds[label]
453
  ]
454
+
455
+ # Early exit if nothing passed
456
  if not threshold_labels:
457
+ return 0.0, [], [], {"label": sentiment}, 1, 0.0, "supportive"
 
 
 
 
458
 
459
  top_patterns = sorted(
460
  [(label, score) for label, score in zip(LABELS, scores)],
461
  key=lambda x: x[1],
462
  reverse=True
463
  )[:2]
 
 
 
 
 
464
 
465
  matched_scores = [
466
  (label, score, PATTERN_WEIGHTS.get(label, 1.0))
467
  for label, score in zip(LABELS, scores)
468
  if score > adjusted_thresholds[label]
469
  ]
470
+
471
+ # Determine insult subtype
472
  insults_score = next((score for label, score, _ in matched_scores if label == "insults"), 0)
473
  insult_label_display = None
474
  if insults_score > 0.9 and (emotion_profile.get("anger", 0) > 0.1 or emotion_profile.get("disgust", 0) > 0.1):
475
  insult_label_display = "Direct Insult"
476
  elif 0.5 < insults_score <= 0.9 and emotion_profile.get("neutral", 0) > 0.85:
477
  insult_label_display = "Subtle Undermining"
478
+
479
+ # Abuse score
480
  abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
 
481
 
482
+ # Weapon adjustment
 
 
 
483
  if weapon_flag:
484
  abuse_score_raw = min(abuse_score_raw + 25, 100)
485
+ if stage < 2:
486
+ stage = 2
487
 
488
  abuse_score = min(abuse_score_raw, 100 if "control" in threshold_labels else 95)
489
 
490
+ # Tone tag
491
  tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
492
+
493
+ # Remove recovery tag if tone is fake
494
+ if "recovery" in threshold_labels and tone_tag == "forced accountability flip":
495
+ threshold_labels.remove("recovery")
496
+ top_patterns = [p for p in top_patterns if p[0] != "recovery"]
497
+ print("⚠️ Removing 'recovery' due to undermining sentiment (not genuine repair)")
498
+
499
+ # Override profanity/anger for short texts
500
  profane_words = {"fuck", "fucking", "bitch", "shit", "cunt", "ho", "asshole", "dick", "whore", "slut"}
501
  tokens = set(text.lower().split())
502
  has_profane = any(word in tokens for word in profane_words)
 
 
503
  short_text = len(tokens) <= 10
504
+ anger_score = emotion_profile.get("anger", 0)
 
505
  if has_profane and anger_score > 0.75 and short_text:
506
  print("⚠️ Profanity + Anger Override Triggered")
507
+ insult_score = next((s for l, s in top_patterns if l == "insults"), 0)
508
+ if ("insults", insult_score) not in top_patterns:
509
+ top_patterns = [("insults", insult_score)] + top_patterns
510
  if "insults" not in threshold_labels:
511
  threshold_labels.append("insults")
512
+
513
+ # Replace 'insults' with descriptive label in output
514
+ if insult_label_display and "insults" in threshold_labels:
515
+ threshold_labels = [
516
+ insult_label_display if label == "insults" else label
517
+ for label in threshold_labels
518
+ ]
519
+
520
  # Debug
521
  print(f"Emotional Tone Tag: {tone_tag}")
522
  print("Emotion Profile:")
 
534
  print("------------------\n")
535
 
536
  return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
537
+
538
  import spaces
539
 
540
  @spaces.GPU