SamanthaStorm commited on
Commit
c238741
·
verified ·
1 Parent(s): 6ede042

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -24
app.py CHANGED
@@ -382,35 +382,72 @@ THREAT_MOTIFS = [
382
  ]
383
 
384
 
 
385
  def compute_abuse_score(matched_scores, sentiment):
 
 
 
386
  if not matched_scores:
387
- return 0
388
-
389
- # Weighted average of passed patterns
390
- weighted_total = sum(score * weight for _, score, weight in matched_scores)
391
- weight_sum = sum(weight for _, _, weight in matched_scores)
392
- base_score = (weighted_total / weight_sum) * 100
393
-
394
- # Boost for pattern count
395
- pattern_count = len(matched_scores)
396
- scale = 1.0 + 0.25 * max(0, pattern_count - 1) # 1.25x for 2, 1.5x for 3+
397
- scaled_score = base_score * scale
398
-
399
- # Pattern floors
400
- FLOORS = {
401
- "control": 40,
402
- "gaslighting": 30,
403
- "insults": 25,
404
- "aggression": 40
 
 
 
 
 
 
 
405
  }
406
- floor = max(FLOORS.get(label, 0) for label, _, _ in matched_scores)
407
- adjusted_score = max(scaled_score, floor)
408
 
409
- # Sentiment tweak
410
- if sentiment == "undermining" and adjusted_score < 50:
411
- adjusted_score += 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
 
413
- return min(adjusted_score, 100)
414
 
415
 
416
 
 
382
  ]
383
 
384
 
385
+ @spaces.GPU
386
  def compute_abuse_score(matched_scores, sentiment):
387
+ """
388
+ Compute abuse score from matched patterns and sentiment, with adjustments for overinflation.
389
+ """
390
  if not matched_scores:
391
+ return 0.0
392
+
393
+ # Sort matched scores by score in descending order
394
+ sorted_scores = sorted(matched_scores, key=lambda x: x[1], reverse=True)
395
+
396
+ # Use the highest score as the base, but scale it down if multiple patterns are present
397
+ highest_score = sorted_scores[0][1]
398
+ num_patterns = len(matched_scores)
399
+ if num_patterns > 1:
400
+ highest_score *= (1 - (num_patterns - 1) * 0.1) # Reduce by 10% for each additional pattern after the first
401
+
402
+ base_score = highest_score * 100
403
+
404
+ # Critical patterns with adjusted weights
405
+ critical_patterns = {
406
+ 'gaslighting': 1.5,
407
+ 'guilt tripping': 1.4,
408
+ 'blame shifting': 1.3,
409
+ 'control': 1.4,
410
+ 'insults': 1.2,
411
+ 'manipulation': 1.3, # If you have a 'manipulation' label
412
+ 'love bombing': 1.3,
413
+ 'emotional blackmail': 1.5,
414
+ 'dismissiveness': 1.2,
415
+ 'contradictory statements': 1.2
416
  }
 
 
417
 
418
+ for label, score, _ in matched_scores:
419
+ if label in critical_patterns and score > 0.5:
420
+ base_score *= critical_patterns[label]
421
+
422
+ # Combination multipliers (reduced)
423
+ if len(matched_scores) >= 2:
424
+ base_score *= 1.15 # Reduced from 1.3
425
+ if len(matched_scores) >= 3:
426
+ base_score *= 1.1 # Reduced from 1.2
427
+
428
+ # High confidence boost (reduced)
429
+ if any(score > 0.8 for _, score, _ in matched_scores):
430
+ base_score *= 1.1 # Reduced from 1.2
431
+
432
+ # Sentiment modifier
433
+ if sentiment == "supportive":
434
+ manipulative_patterns = {'guilt tripping', 'gaslighting', 'blame shifting', 'love bombing'}
435
+ if any(label in manipulative_patterns for label, score, _ in matched_scores if score > 0.5):
436
+ base_score *= 0.95 # Slightly less reduction for manipulative "support"
437
+ else:
438
+ base_score *= 0.85 # Larger reduction for genuine support
439
+ elif sentiment == "undermining":
440
+ base_score *= 1.15
441
+
442
+ # Minimum score for high-confidence patterns (adjusted)
443
+ if any(score > 0.8 for _, score, _ in matched_scores):
444
+ base_score = max(base_score, 70.0) # Reduced from 80
445
+ elif any(score > 0.6 for _, score, _ in matched_scores):
446
+ base_score = max(base_score, 55.0) # Reduced from 65
447
+
448
+ return min(round(base_score, 1), 100.0)
449
+
450
 
 
451
 
452
 
453