Spaces:

lg3394
/

aimoderationproject

Sleeping

App Files Files Community

aimoderationproject / app.py

lg3394

Update app.py

df7192b verified 6 months ago

raw

history blame

6.45 kB

	import gradio as gr
	import openai
	import os
	from anthropic import Anthropic
	from azure.ai.contentsafety import ContentSafetyClient
	from azure.ai.contentsafety.models import TextCategory
	from azure.core.credentials import AzureKeyCredential
	from azure.core.exceptions import HttpResponseError
	from azure.ai.contentsafety.models import AnalyzeTextOptions
	import matplotlib.pyplot as plt
	from transformers import pipeline

	# Load OpenAI, Anthropic API Keys from environment variables
	openai.api_key = os.getenv("openaiapikey")
	anthropic_api_key = os.getenv("anthropickey")

	# Initialize Anthropic client
	client = Anthropic(api_key=anthropic_api_key)

	MODEL_NAME = "claude-3-haiku-20240307"

	# Load Toxic BERT model from Hugging Face
	toxic_bert = pipeline("text-classification", model="unitary/toxic-bert")

	# Function for Azure Content Safety analysis
	def analyze_text_azure(user_text):
	# Retrieve Azure keys from Hugging Face secrets (as environment variables)
	key = os.getenv("azurekey")
	endpoint = os.getenv("azureendpoint")

	# Create Content Safety client
	client = ContentSafetyClient(endpoint, AzureKeyCredential(key))

	# Construct request
	request = AnalyzeTextOptions(text=user_text)

	# Analyze text
	try:
	response = client.analyze_text(request)
	except HttpResponseError as e:
	return f"Error occurred with Azure Content Safety: {e}"

	# Extract moderation results
	results = {}
	for category in [TextCategory.HATE, TextCategory.SELF_HARM, TextCategory.SEXUAL, TextCategory.VIOLENCE]:
	result = next((item for item in response.categories_analysis if item.category == category), None)
	if result:
	results[category] = result.severity

	return results if results else {"No flagged content detected": 0}

	def moderate_text(user_text):
	# OpenAI Moderation
	openai_result = {"Classification": "Error in OpenAI Moderation", "Severity": 0}
	try:
	response = openai.Moderation.create(input=user_text)
	moderation_categories = response["results"][0]["categories"]
	moderation_flagged = response["results"][0]["flagged"]

	if moderation_flagged:
	openai_result["Classification"] = "Blocked"
	openai_result["Severity"] = max([category for category, flagged in moderation_categories.items() if flagged], key=moderation_categories.get)
	else:
	openai_result["Classification"] = "Allowed"
	except Exception as e:
	openai_result = {"Classification": f"Error occurred with OpenAI: {e}", "Severity": 0}

	# Anthropic Moderation
	anthropic_result = {"Classification": "Error in Anthropic Moderation", "Severity": 0}
	try:
	prompt_template = """
	You are a content moderation expert tasked with categorizing user-generated text.
	Here is the user-generated text to categorize:
	<user_text>{user_text}</user_text>
	Based on the content, classify this text as either ALLOW or BLOCK. Return nothing else.
	"""

	# Format the prompt with the user text
	prompt = prompt_template.format(user_text=user_text)

	# Send the prompt to Claude and get the response
	response = client.messages.create(
	model=MODEL_NAME,
	max_tokens=10,
	messages=[{"role": "user", "content": prompt}]
	).content[0].text

	if "blocked" in response.lower():
	anthropic_result["Classification"] = "Blocked"
	anthropic_result["Severity"] = 1 # Assigning severity for blocked content
	else:
	anthropic_result["Classification"] = "Allowed"
	anthropic_result["Severity"] = 0
	except Exception as e:
	anthropic_result = {"Classification": f"Error occurred with Anthropic: {e}", "Severity": 0}

	# Azure Content Safety Moderation
	azure_result = analyze_text_azure(user_text)

	# Toxic BERT Moderation (Hugging Face Model)
	toxic_result = toxic_bert(user_text)
	toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed" # Toxic BERT classifies as "LABEL_1" for toxic
	toxic_severity = toxic_result[0]['score']

	# Combine results and generate bar chart
	categories = ["OpenAI", "Anthropic", "Microsoft Azure", "Toxic BERT"]
	classifications = [openai_result["Severity"], anthropic_result["Severity"], sum(azure_result.values()) / len(azure_result) if azure_result else 0, toxic_severity]

	bar_chart = create_comparison_chart(categories, classifications)

	# Safe text suggestion for blocked content
	suggestions = ""
	if openai_result["Classification"] == "Blocked":
	suggestions += "OpenAI flagged the text for harmful content. Suggested Rephrase: 'Please use more respectful language.'\n"
	if anthropic_result["Classification"] == "Blocked":
	suggestions += "Anthropic flagged the text. Suggested Rephrase: 'Avoid harmful or offensive language.'\n"
	if any(value > 0.5 for value in azure_result.values()):
	suggestions += "Azure flagged some content. Suggested Rephrase: 'Try to avoid sensitive topics and ensure respectful language.'\n"
	if toxic_classification == "Blocked":
	suggestions += "Toxic BERT flagged the text. Suggested Rephrase: 'Please ensure your language is respectful and non-toxic.'"

	return openai_result, anthropic_result, azure_result, toxic_result, bar_chart, suggestions

	def create_comparison_chart(categories, values):
	fig, ax = plt.subplots()
	ax.bar(categories, values, color=['red', 'orange', 'green', 'blue'])
	ax.set_title("Content Moderation Comparison")
	ax.set_ylabel("Severity Score")
	ax.set_ylim(0, 1)
	ax.set_xlabel("Moderation Tool")
	return fig

	# Create the Gradio interface with updated input and output labels
	iface = gr.Interface(
	fn=moderate_text,
	inputs=gr.Textbox(lines=2, placeholder="Please write your text here..."),
	outputs=[
	gr.Textbox(label="OpenAI"),
	gr.Textbox(label="Anthropic"),
	gr.Textbox(label="Microsoft Azure"),
	gr.Textbox(label="Toxic BERT"),
	gr.Plot(label="Comparison Bar Chart"),
	gr.Textbox(label="Safe Text Suggestions")
	],
	title="Content Moderation Tool",
	description="Enter some text and get the moderation results from OpenAI, Anthropic, Azure Content Safety, Toxic BERT, and suggestions for safe rephrasing."
	)

	if __name__ == "__main__":
	iface.launch()