SamanthaStorm commited on
Commit
a9b6112
·
verified ·
1 Parent(s): c238741

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -35
app.py CHANGED
@@ -382,75 +382,73 @@ THREAT_MOTIFS = [
382
  ]
383
 
384
 
 
385
  @spaces.GPU
386
  def compute_abuse_score(matched_scores, sentiment):
387
  """
388
- Compute abuse score from matched patterns and sentiment, with adjustments for overinflation.
389
  """
390
  if not matched_scores:
391
  return 0.0
392
-
393
- # Sort matched scores by score in descending order
394
- sorted_scores = sorted(matched_scores, key=lambda x: x[1], reverse=True)
395
 
396
- # Use the highest score as the base, but scale it down if multiple patterns are present
397
  highest_score = sorted_scores[0][1]
398
  num_patterns = len(matched_scores)
 
 
399
  if num_patterns > 1:
400
- highest_score *= (1 - (num_patterns - 1) * 0.1) # Reduce by 10% for each additional pattern after the first
401
 
402
  base_score = highest_score * 100
403
 
404
- # Critical patterns with adjusted weights
405
  critical_patterns = {
406
- 'gaslighting': 1.5,
407
- 'guilt tripping': 1.4,
408
- 'blame shifting': 1.3,
409
- 'control': 1.4,
410
- 'insults': 1.2,
411
- 'manipulation': 1.3, # If you have a 'manipulation' label
412
- 'love bombing': 1.3,
413
- 'emotional blackmail': 1.5,
414
- 'dismissiveness': 1.2,
415
- 'contradictory statements': 1.2
416
  }
417
 
418
  for label, score, _ in matched_scores:
419
  if label in critical_patterns and score > 0.5:
420
  base_score *= critical_patterns[label]
421
 
422
- # Combination multipliers (reduced)
423
  if len(matched_scores) >= 2:
424
- base_score *= 1.15 # Reduced from 1.3
425
  if len(matched_scores) >= 3:
426
- base_score *= 1.1 # Reduced from 1.2
427
 
428
- # High confidence boost (reduced)
429
  if any(score > 0.8 for _, score, _ in matched_scores):
430
- base_score *= 1.1 # Reduced from 1.2
431
 
432
- # Sentiment modifier
433
  if sentiment == "supportive":
434
  manipulative_patterns = {'guilt tripping', 'gaslighting', 'blame shifting', 'love bombing'}
435
- if any(label in manipulative_patterns for label, score, _ in matched_scores if score > 0.5):
436
- base_score *= 0.95 # Slightly less reduction for manipulative "support"
 
 
437
  else:
438
- base_score *= 0.85 # Larger reduction for genuine support
 
439
  elif sentiment == "undermining":
440
  base_score *= 1.15
441
 
442
- # Minimum score for high-confidence patterns (adjusted)
443
- if any(score > 0.8 for _, score, _ in matched_scores):
444
- base_score = max(base_score, 70.0) # Reduced from 80
445
- elif any(score > 0.6 for _, score, _ in matched_scores):
446
- base_score = max(base_score, 55.0) # Reduced from 65
447
 
448
  return min(round(base_score, 1), 100.0)
449
 
450
-
451
-
452
-
453
-
454
  def analyze_single_message(text, thresholds):
455
  print("⚡ ENTERED analyze_single_message")
456
  stage = 1
 
382
  ]
383
 
384
 
385
+ @spaces.GPU
386
  @spaces.GPU
387
  def compute_abuse_score(matched_scores, sentiment):
388
  """
389
+ Compute abuse score with more conservative adjustments.
390
  """
391
  if not matched_scores:
392
  return 0.0
 
 
 
393
 
394
+ sorted_scores = sorted(matched_scores, key=lambda x: x[1], reverse=True)
395
  highest_score = sorted_scores[0][1]
396
  num_patterns = len(matched_scores)
397
+
398
+ # Scale down base score more aggressively if multiple patterns are present
399
  if num_patterns > 1:
400
+ highest_score *= (1 - (num_patterns - 1) * 0.2) # Reduce by 20% for each additional pattern
401
 
402
  base_score = highest_score * 100
403
 
 
404
  critical_patterns = {
405
+ 'gaslighting': 1.4, # Reduced
406
+ 'guilt tripping': 1.3, # Reduced
407
+ 'blame shifting': 1.2, # Reduced
408
+ 'control': 1.3, # Reduced
409
+ 'insults': 1.1, # Reduced
410
+ 'manipulation': 1.2,
411
+ 'love bombing': 1.2,
412
+ 'emotional blackmail': 1.4,
413
+ 'dismissiveness': 1.1,
414
+ 'contradictory statements': 1.1
415
  }
416
 
417
  for label, score, _ in matched_scores:
418
  if label in critical_patterns and score > 0.5:
419
  base_score *= critical_patterns[label]
420
 
421
+ # Further reduce combination multipliers
422
  if len(matched_scores) >= 2:
423
+ base_score *= 1.1 # Reduced
424
  if len(matched_scores) >= 3:
425
+ base_score *= 1.05 # Reduced
426
 
427
+ # Reduce high confidence boost
428
  if any(score > 0.8 for _, score, _ in matched_scores):
429
+ base_score *= 1.05 # Reduced
430
 
431
+ # Sentiment modifier (more nuanced)
432
  if sentiment == "supportive":
433
  manipulative_patterns = {'guilt tripping', 'gaslighting', 'blame shifting', 'love bombing'}
434
+ if any(label in manipulative_patterns for label, score, _ in matched_scores if score > 0.6): # Higher threshold
435
+ base_score *= 0.95 # Smaller reduction for strongly manipulative "support"
436
+ elif any(label in manipulative_patterns for label, score, _ in matched_scores if score > 0.4): # Moderate threshold
437
+ base_score *= 0.9 # Moderate reduction for manipulative "support"
438
  else:
439
+ base_score *= 0.8 # Larger reduction for genuine support
440
+
441
  elif sentiment == "undermining":
442
  base_score *= 1.15
443
 
444
+ # Reduce minimum score and threshold for activation
445
+ if any(score > 0.9 for _, score, _ in matched_scores): # Higher threshold
446
+ base_score = max(base_score, 75.0) # Reduced
447
+ elif any(score > 0.7 for _, score, _ in matched_scores): # Moderate threshold
448
+ base_score = max(base_score, 60.0) # Reduced
449
 
450
  return min(round(base_score, 1), 100.0)
451
 
 
 
 
 
452
  def analyze_single_message(text, thresholds):
453
  print("⚡ ENTERED analyze_single_message")
454
  stage = 1