Spaces:
Running
Running
File size: 6,448 Bytes
7ebc89a 96c489b 52dd252 2d22e60 739e1bf df7192b b7bab80 df7192b 52dd252 2d22e60 7ebc89a 2d22e60 c0bc9ab 2d22e60 c0bc9ab df7192b 739e1bf df7192b 739e1bf 2d22e60 df7192b 96c489b 2d22e60 96c489b c0bc9ab 96c489b df7192b 96c489b df7192b 2d22e60 df7192b 2d22e60 739e1bf df7192b 2d22e60 739e1bf 2d22e60 df7192b 72e15e5 df7192b 2d22e60 739e1bf df7192b e1732a2 739e1bf e1732a2 739e1bf df7192b 2d22e60 df7192b e1732a2 72e15e5 df7192b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import gradio as gr
import openai
import os
from anthropic import Anthropic
from azure.ai.contentsafety import ContentSafetyClient
from azure.ai.contentsafety.models import TextCategory
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import HttpResponseError
from azure.ai.contentsafety.models import AnalyzeTextOptions
import matplotlib.pyplot as plt
from transformers import pipeline
# Load OpenAI, Anthropic API Keys from environment variables
openai.api_key = os.getenv("openaiapikey")
anthropic_api_key = os.getenv("anthropickey")
# Initialize Anthropic client
client = Anthropic(api_key=anthropic_api_key)
MODEL_NAME = "claude-3-haiku-20240307"
# Load Toxic BERT model from Hugging Face
toxic_bert = pipeline("text-classification", model="unitary/toxic-bert")
# Function for Azure Content Safety analysis
def analyze_text_azure(user_text):
# Retrieve Azure keys from Hugging Face secrets (as environment variables)
key = os.getenv("azurekey")
endpoint = os.getenv("azureendpoint")
# Create Content Safety client
client = ContentSafetyClient(endpoint, AzureKeyCredential(key))
# Construct request
request = AnalyzeTextOptions(text=user_text)
# Analyze text
try:
response = client.analyze_text(request)
except HttpResponseError as e:
return f"Error occurred with Azure Content Safety: {e}"
# Extract moderation results
results = {}
for category in [TextCategory.HATE, TextCategory.SELF_HARM, TextCategory.SEXUAL, TextCategory.VIOLENCE]:
result = next((item for item in response.categories_analysis if item.category == category), None)
if result:
results[category] = result.severity
return results if results else {"No flagged content detected": 0}
def moderate_text(user_text):
# OpenAI Moderation
openai_result = {"Classification": "Error in OpenAI Moderation", "Severity": 0}
try:
response = openai.Moderation.create(input=user_text)
moderation_categories = response["results"][0]["categories"]
moderation_flagged = response["results"][0]["flagged"]
if moderation_flagged:
openai_result["Classification"] = "Blocked"
openai_result["Severity"] = max([category for category, flagged in moderation_categories.items() if flagged], key=moderation_categories.get)
else:
openai_result["Classification"] = "Allowed"
except Exception as e:
openai_result = {"Classification": f"Error occurred with OpenAI: {e}", "Severity": 0}
# Anthropic Moderation
anthropic_result = {"Classification": "Error in Anthropic Moderation", "Severity": 0}
try:
prompt_template = """
You are a content moderation expert tasked with categorizing user-generated text.
Here is the user-generated text to categorize:
<user_text>{user_text}</user_text>
Based on the content, classify this text as either ALLOW or BLOCK. Return nothing else.
"""
# Format the prompt with the user text
prompt = prompt_template.format(user_text=user_text)
# Send the prompt to Claude and get the response
response = client.messages.create(
model=MODEL_NAME,
max_tokens=10,
messages=[{"role": "user", "content": prompt}]
).content[0].text
if "blocked" in response.lower():
anthropic_result["Classification"] = "Blocked"
anthropic_result["Severity"] = 1 # Assigning severity for blocked content
else:
anthropic_result["Classification"] = "Allowed"
anthropic_result["Severity"] = 0
except Exception as e:
anthropic_result = {"Classification": f"Error occurred with Anthropic: {e}", "Severity": 0}
# Azure Content Safety Moderation
azure_result = analyze_text_azure(user_text)
# Toxic BERT Moderation (Hugging Face Model)
toxic_result = toxic_bert(user_text)
toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed" # Toxic BERT classifies as "LABEL_1" for toxic
toxic_severity = toxic_result[0]['score']
# Combine results and generate bar chart
categories = ["OpenAI", "Anthropic", "Microsoft Azure", "Toxic BERT"]
classifications = [openai_result["Severity"], anthropic_result["Severity"], sum(azure_result.values()) / len(azure_result) if azure_result else 0, toxic_severity]
bar_chart = create_comparison_chart(categories, classifications)
# Safe text suggestion for blocked content
suggestions = ""
if openai_result["Classification"] == "Blocked":
suggestions += "OpenAI flagged the text for harmful content. Suggested Rephrase: 'Please use more respectful language.'\n"
if anthropic_result["Classification"] == "Blocked":
suggestions += "Anthropic flagged the text. Suggested Rephrase: 'Avoid harmful or offensive language.'\n"
if any(value > 0.5 for value in azure_result.values()):
suggestions += "Azure flagged some content. Suggested Rephrase: 'Try to avoid sensitive topics and ensure respectful language.'\n"
if toxic_classification == "Blocked":
suggestions += "Toxic BERT flagged the text. Suggested Rephrase: 'Please ensure your language is respectful and non-toxic.'"
return openai_result, anthropic_result, azure_result, toxic_result, bar_chart, suggestions
def create_comparison_chart(categories, values):
fig, ax = plt.subplots()
ax.bar(categories, values, color=['red', 'orange', 'green', 'blue'])
ax.set_title("Content Moderation Comparison")
ax.set_ylabel("Severity Score")
ax.set_ylim(0, 1)
ax.set_xlabel("Moderation Tool")
return fig
# Create the Gradio interface with updated input and output labels
iface = gr.Interface(
fn=moderate_text,
inputs=gr.Textbox(lines=2, placeholder="Please write your text here..."),
outputs=[
gr.Textbox(label="OpenAI"),
gr.Textbox(label="Anthropic"),
gr.Textbox(label="Microsoft Azure"),
gr.Textbox(label="Toxic BERT"),
gr.Plot(label="Comparison Bar Chart"),
gr.Textbox(label="Safe Text Suggestions")
],
title="Content Moderation Tool",
description="Enter some text and get the moderation results from OpenAI, Anthropic, Azure Content Safety, Toxic BERT, and suggestions for safe rephrasing."
)
if __name__ == "__main__":
iface.launch() |