rwillats commited on
Commit
ac172b9
·
verified ·
1 Parent(s): 22f3054

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. ai_responses_demo.py +56 -44
ai_responses_demo.py CHANGED
@@ -439,50 +439,62 @@ def get_llama_guard_rating(together_client, user_input, user_response):
439
 
440
  # OpenAI moderation
441
  def get_openai_moderation(openai_client, user_input, user_response):
442
- max_retries = 3
443
- base_delay = 1 # Start with 1 second delay
444
-
445
- for attempt in range(max_retries):
446
- try:
447
- response = openai_client.moderations.create(input=user_response, model="omni-moderation-latest")
448
-
449
- moderation_result = response.results[0]
450
- flagged = moderation_result.flagged
451
- safety_status = "Unsafe" if flagged else "Safe"
452
- safety_level = "unsafe" if flagged else "safe"
453
-
454
- categories = moderation_result.categories
455
- high_risk_categories = {
456
- category: score
457
- for category, score in vars(categories).items()
458
- if isinstance(score, (int, float)) and score > 0.5
459
- }
460
-
461
- if not flagged and any(score > 0.3 for score in vars(categories).values() if isinstance(score, (int, float))):
462
- safety_level = "warning"
463
-
464
- formatted_result = f"Safety Status: {safety_status}\n"
465
- if high_risk_categories:
466
- formatted_result += "Flagged Categories (Confidence > 0.5):\n"
467
- for category, score in high_risk_categories.items():
468
- formatted_result += f" - {category}: {score:.2f}\n"
469
- else:
470
- formatted_result += "Flagged Categories: None\n"
471
-
472
- return formatted_result, safety_level
473
-
474
- except Exception as e:
475
- error_message = str(e)
476
- # If this is a rate limit error (429)
477
- if "429" in error_message or "Too Many Requests" in error_message:
478
- if attempt < max_retries - 1: # Don't sleep on the last attempt
479
- # Calculate delay with exponential backoff and jitter
480
- delay = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
481
- time.sleep(delay)
482
- continue
483
-
484
- # For non-429 errors or if we've exhausted retries
485
- return f"Safety Status: Error\nError: {error_message}", "unsafe"
 
 
 
 
 
 
 
 
 
 
 
 
486
 
487
  # NEW APPROACH: Instead of modal, show/hide the knowledge directly in the page
488
  def rate_user_interaction(user_input, user_response):
 
439
 
440
  # OpenAI moderation
441
  def get_openai_moderation(openai_client, user_input, user_response):
442
+ try:
443
+ # Make the API call
444
+ response = openai_client.moderations.create(input=user_response, model="omni-moderation-latest")
445
+
446
+ moderation_result = response.results[0]
447
+ flagged = moderation_result.flagged
448
+ safety_status = "Unsafe" if flagged else "Safe"
449
+ safety_level = "unsafe" if flagged else "safe"
450
+
451
+ categories = moderation_result.categories
452
+ high_risk_categories = {}
453
+
454
+ # Check each category explicitly
455
+ for cat_name in ["hate", "hate_threatening", "harassment", "harassment_threatening",
456
+ "self_harm", "self_harm_intent", "self_harm_instructions",
457
+ "violence", "violence_graphic", "sexual", "sexual_minors",
458
+ "illicit", "illicit_violent"]:
459
+ if hasattr(categories, cat_name):
460
+ score = getattr(categories, cat_name)
461
+ if score > 0.5:
462
+ high_risk_categories[cat_name] = score
463
+
464
+ # Check for warning level
465
+ if not flagged:
466
+ for cat_name in ["hate", "hate_threatening", "harassment", "harassment_threatening",
467
+ "self_harm", "self_harm_intent", "self_harm_instructions",
468
+ "violence", "violence_graphic", "sexual", "sexual_minors",
469
+ "illicit", "illicit_violent"]:
470
+ if hasattr(categories, cat_name):
471
+ score = getattr(categories, cat_name)
472
+ if score > 0.3:
473
+ safety_level = "warning"
474
+ break
475
+
476
+ formatted_result = f"Safety Status: {safety_status}\n"
477
+ if high_risk_categories:
478
+ formatted_result += "Flagged Categories (Confidence > 0.5):\n"
479
+ for category, score in high_risk_categories.items():
480
+ formatted_result += f" - {category.replace('_', ' ').title()}: {score:.2f}\n"
481
+ else:
482
+ formatted_result += "Flagged Categories: None\n"
483
+
484
+ return formatted_result, safety_level
485
+
486
+ except Exception as e:
487
+ error_msg = str(e)
488
+
489
+ # Handle rate limit errors with a more user-friendly message
490
+ if "429" in error_msg or "Too Many Requests" in error_msg:
491
+ return (
492
+ "OpenAI Moderation temporarily unavailable due to rate limiting.\n"
493
+ "Please try again in a few minutes.",
494
+ "warning"
495
+ )
496
+ else:
497
+ return f"OpenAI Moderation unavailable.\nError: {error_msg[:100]}...", "warning"
498
 
499
  # NEW APPROACH: Instead of modal, show/hide the knowledge directly in the page
500
  def rate_user_interaction(user_input, user_response):