Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- ai_responses_demo.py +56 -44
ai_responses_demo.py
CHANGED
@@ -439,50 +439,62 @@ def get_llama_guard_rating(together_client, user_input, user_response):
|
|
439 |
|
440 |
# OpenAI moderation
|
441 |
def get_openai_moderation(openai_client, user_input, user_response):
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
486 |
|
487 |
# NEW APPROACH: Instead of modal, show/hide the knowledge directly in the page
|
488 |
def rate_user_interaction(user_input, user_response):
|
|
|
439 |
|
440 |
# OpenAI moderation
|
441 |
def get_openai_moderation(openai_client, user_input, user_response):
|
442 |
+
try:
|
443 |
+
# Make the API call
|
444 |
+
response = openai_client.moderations.create(input=user_response, model="omni-moderation-latest")
|
445 |
+
|
446 |
+
moderation_result = response.results[0]
|
447 |
+
flagged = moderation_result.flagged
|
448 |
+
safety_status = "Unsafe" if flagged else "Safe"
|
449 |
+
safety_level = "unsafe" if flagged else "safe"
|
450 |
+
|
451 |
+
categories = moderation_result.categories
|
452 |
+
high_risk_categories = {}
|
453 |
+
|
454 |
+
# Check each category explicitly
|
455 |
+
for cat_name in ["hate", "hate_threatening", "harassment", "harassment_threatening",
|
456 |
+
"self_harm", "self_harm_intent", "self_harm_instructions",
|
457 |
+
"violence", "violence_graphic", "sexual", "sexual_minors",
|
458 |
+
"illicit", "illicit_violent"]:
|
459 |
+
if hasattr(categories, cat_name):
|
460 |
+
score = getattr(categories, cat_name)
|
461 |
+
if score > 0.5:
|
462 |
+
high_risk_categories[cat_name] = score
|
463 |
+
|
464 |
+
# Check for warning level
|
465 |
+
if not flagged:
|
466 |
+
for cat_name in ["hate", "hate_threatening", "harassment", "harassment_threatening",
|
467 |
+
"self_harm", "self_harm_intent", "self_harm_instructions",
|
468 |
+
"violence", "violence_graphic", "sexual", "sexual_minors",
|
469 |
+
"illicit", "illicit_violent"]:
|
470 |
+
if hasattr(categories, cat_name):
|
471 |
+
score = getattr(categories, cat_name)
|
472 |
+
if score > 0.3:
|
473 |
+
safety_level = "warning"
|
474 |
+
break
|
475 |
+
|
476 |
+
formatted_result = f"Safety Status: {safety_status}\n"
|
477 |
+
if high_risk_categories:
|
478 |
+
formatted_result += "Flagged Categories (Confidence > 0.5):\n"
|
479 |
+
for category, score in high_risk_categories.items():
|
480 |
+
formatted_result += f" - {category.replace('_', ' ').title()}: {score:.2f}\n"
|
481 |
+
else:
|
482 |
+
formatted_result += "Flagged Categories: None\n"
|
483 |
+
|
484 |
+
return formatted_result, safety_level
|
485 |
+
|
486 |
+
except Exception as e:
|
487 |
+
error_msg = str(e)
|
488 |
+
|
489 |
+
# Handle rate limit errors with a more user-friendly message
|
490 |
+
if "429" in error_msg or "Too Many Requests" in error_msg:
|
491 |
+
return (
|
492 |
+
"OpenAI Moderation temporarily unavailable due to rate limiting.\n"
|
493 |
+
"Please try again in a few minutes.",
|
494 |
+
"warning"
|
495 |
+
)
|
496 |
+
else:
|
497 |
+
return f"OpenAI Moderation unavailable.\nError: {error_msg[:100]}...", "warning"
|
498 |
|
499 |
# NEW APPROACH: Instead of modal, show/hide the knowledge directly in the page
|
500 |
def rate_user_interaction(user_input, user_response):
|