Spaces:
Sleeping
Sleeping
import gradio as gr | |
import openai | |
import os | |
from anthropic import Anthropic | |
from azure.ai.contentsafety import ContentSafetyClient | |
from azure.ai.contentsafety.models import TextCategory | |
from azure.core.credentials import AzureKeyCredential | |
from azure.core.exceptions import HttpResponseError | |
from azure.ai.contentsafety.models import AnalyzeTextOptions | |
import matplotlib.pyplot as plt | |
from transformers import pipeline | |
# Load OpenAI, Anthropic API Keys from environment variables | |
openai.api_key = os.getenv("openaiapikey") | |
anthropic_api_key = os.getenv("anthropickey") | |
# Initialize Anthropic client | |
client = Anthropic(api_key=anthropic_api_key) | |
MODEL_NAME = "claude-3-haiku-20240307" | |
# Load Toxic BERT model from Hugging Face | |
toxic_bert = pipeline("text-classification", model="unitary/toxic-bert") | |
# Function for Azure Content Safety analysis | |
def analyze_text_azure(user_text): | |
# Retrieve Azure keys from Hugging Face secrets (as environment variables) | |
key = os.getenv("azurekey") | |
endpoint = os.getenv("azureendpoint") | |
# Create Content Safety client | |
client = ContentSafetyClient(endpoint, AzureKeyCredential(key)) | |
# Construct request | |
request = AnalyzeTextOptions(text=user_text) | |
# Analyze text | |
try: | |
response = client.analyze_text(request) | |
except HttpResponseError as e: | |
return f"Error occurred with Azure Content Safety: {e}" | |
# Extract moderation results | |
results = {} | |
for category in [TextCategory.HATE, TextCategory.SELF_HARM, TextCategory.SEXUAL, TextCategory.VIOLENCE]: | |
result = next((item for item in response.categories_analysis if item.category == category), None) | |
if result: | |
results[category] = result.severity | |
return results if results else {"No flagged content detected": 0} | |
def moderate_text(user_text): | |
# OpenAI Moderation | |
openai_result = {"Classification": "Error in OpenAI Moderation", "Severity": 0} | |
try: | |
response = openai.Moderation.create(input=user_text) | |
moderation_categories = response["results"][0]["categories"] | |
moderation_flagged = response["results"][0]["flagged"] | |
if moderation_flagged: | |
openai_result["Classification"] = "Blocked" | |
openai_result["Severity"] = max([category for category, flagged in moderation_categories.items() if flagged], key=moderation_categories.get) | |
else: | |
openai_result["Classification"] = "Allowed" | |
except Exception as e: | |
openai_result = {"Classification": f"Error occurred with OpenAI: {e}", "Severity": 0} | |
# Anthropic Moderation | |
anthropic_result = {"Classification": "Error in Anthropic Moderation", "Severity": 0} | |
try: | |
prompt_template = """ | |
You are a content moderation expert tasked with categorizing user-generated text. | |
Here is the user-generated text to categorize: | |
<user_text>{user_text}</user_text> | |
Based on the content, classify this text as either ALLOW or BLOCK. Return nothing else. | |
""" | |
# Format the prompt with the user text | |
prompt = prompt_template.format(user_text=user_text) | |
# Send the prompt to Claude and get the response | |
response = client.messages.create( | |
model=MODEL_NAME, | |
max_tokens=10, | |
messages=[{"role": "user", "content": prompt}] | |
).content[0].text | |
if "blocked" in response.lower(): | |
anthropic_result["Classification"] = "Blocked" | |
anthropic_result["Severity"] = 1 # Assigning severity for blocked content | |
else: | |
anthropic_result["Classification"] = "Allowed" | |
anthropic_result["Severity"] = 0 | |
except Exception as e: | |
anthropic_result = {"Classification": f"Error occurred with Anthropic: {e}", "Severity": 0} | |
# Azure Content Safety Moderation | |
azure_result = analyze_text_azure(user_text) | |
# Toxic BERT Moderation (Hugging Face Model) | |
toxic_result = toxic_bert(user_text) | |
toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed" # Toxic BERT classifies as "LABEL_1" for toxic | |
toxic_severity = toxic_result[0]['score'] | |
# Combine results and generate bar chart | |
categories = ["OpenAI", "Anthropic", "Microsoft Azure", "Toxic BERT"] | |
classifications = [openai_result["Severity"], anthropic_result["Severity"], sum(azure_result.values()) / len(azure_result) if azure_result else 0, toxic_severity] | |
bar_chart = create_comparison_chart(categories, classifications) | |
# Safe text suggestion for blocked content | |
suggestions = "" | |
if openai_result["Classification"] == "Blocked": | |
suggestions += "OpenAI flagged the text for harmful content. Suggested Rephrase: 'Please use more respectful language.'\n" | |
if anthropic_result["Classification"] == "Blocked": | |
suggestions += "Anthropic flagged the text. Suggested Rephrase: 'Avoid harmful or offensive language.'\n" | |
if any(value > 0.5 for value in azure_result.values()): | |
suggestions += "Azure flagged some content. Suggested Rephrase: 'Try to avoid sensitive topics and ensure respectful language.'\n" | |
if toxic_classification == "Blocked": | |
suggestions += "Toxic BERT flagged the text. Suggested Rephrase: 'Please ensure your language is respectful and non-toxic.'" | |
return openai_result, anthropic_result, azure_result, toxic_result, bar_chart, suggestions | |
def create_comparison_chart(categories, values): | |
fig, ax = plt.subplots() | |
ax.bar(categories, values, color=['red', 'orange', 'green', 'blue']) | |
ax.set_title("Content Moderation Comparison") | |
ax.set_ylabel("Severity Score") | |
ax.set_ylim(0, 1) | |
ax.set_xlabel("Moderation Tool") | |
return fig | |
# Create the Gradio interface with updated input and output labels | |
iface = gr.Interface( | |
fn=moderate_text, | |
inputs=gr.Textbox(lines=2, placeholder="Please write your text here..."), | |
outputs=[ | |
gr.Textbox(label="OpenAI"), | |
gr.Textbox(label="Anthropic"), | |
gr.Textbox(label="Microsoft Azure"), | |
gr.Textbox(label="Toxic BERT"), | |
gr.Plot(label="Comparison Bar Chart"), | |
gr.Textbox(label="Safe Text Suggestions") | |
], | |
title="Content Moderation Tool", | |
description="Enter some text and get the moderation results from OpenAI, Anthropic, Azure Content Safety, Toxic BERT, and suggestions for safe rephrasing." | |
) | |
if __name__ == "__main__": | |
iface.launch() |