File size: 6,667 Bytes
7ebc89a
52dd252
7050beb
2d22e60
739e1bf
 
 
 
 
5ec2d79
b7bab80
00b49d5
 
 
4b1da5c
 
00b49d5
 
 
2d22e60
7ebc89a
2d22e60
 
c0bc9ab
2d22e60
c0bc9ab
df7192b
 
 
739e1bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ec2d79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
739e1bf
2d22e60
4b1da5c
96c489b
4b1da5c
7050beb
 
 
 
c0bc9ab
96c489b
7050beb
 
96c489b
5ec2d79
2d22e60
4b1da5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d22e60
739e1bf
5ec2d79
2d22e60
 
739e1bf
2d22e60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ec2d79
 
72e15e5
5ec2d79
2d22e60
739e1bf
5ec2d79
df7192b
 
 
3298b12
 
 
df7192b
3298b12
 
df7192b
cd880dc
 
 
 
e1732a2
739e1bf
e1732a2
 
739e1bf
df7192b
 
 
 
5ec2d79
df7192b
718f06f
4b1da5c
e1732a2
 
72e15e5
7050beb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import gradio as gr
import os
from openai import OpenAI
from anthropic import Anthropic
from azure.ai.contentsafety import ContentSafetyClient
from azure.ai.contentsafety.models import TextCategory
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import HttpResponseError
from azure.ai.contentsafety.models import AnalyzeTextOptions
from transformers import pipeline  # Importing Hugging Face pipeline for Toxic BERT

# Try to get the API key from either environment variable
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("openaiapikey")
if not api_key:
    print("WARNING: No OpenAI API key found in environment variables!")
    api_key = "placeholder_key_for_initialization"  # This will cause a controlled error

# Initialize OpenAI client
openai_client = OpenAI(api_key=api_key)
anthropic_api_key = os.getenv("anthropickey")

# Initialize Anthropic client
client = Anthropic(api_key=anthropic_api_key)

MODEL_NAME = "claude-3-haiku-20240307"

# Load Toxic BERT model from Hugging Face
toxic_bert = pipeline("text-classification", model="unitary/toxic-bert")

# Function for Azure Content Safety analysis
def analyze_text_azure(user_text):
    # Retrieve Azure keys from Hugging Face secrets (as environment variables)
    key = os.getenv("azurekey")
    endpoint = os.getenv("azureendpoint")

    # Create Content Safety client
    client = ContentSafetyClient(endpoint, AzureKeyCredential(key))

    # Construct request
    request = AnalyzeTextOptions(text=user_text)

    # Analyze text
    try:
        response = client.analyze_text(request)
    except HttpResponseError as e:
        return f"Error occurred with Azure Content Safety: {e}"

    # Extract moderation results
    results = []
    hate_result = next((item for item in response.categories_analysis if item.category == TextCategory.HATE), None)
    self_harm_result = next((item for item in response.categories_analysis if item.category == TextCategory.SELF_HARM), None)
    sexual_result = next((item for item in response.categories_analysis if item.category == TextCategory.SEXUAL), None)
    violence_result = next((item for item in response.categories_analysis if item.category == TextCategory.VIOLENCE), None)

    if hate_result:
        results.append(f"Hate severity: {hate_result.severity}")
    if self_harm_result:
        results.append(f"SelfHarm severity: {self_harm_result.severity}")
    if sexual_result:
        results.append(f"Sexual severity: {sexual_result.severity}")
    if violence_result:
        results.append(f"Violence severity: {violence_result.severity}")

    return "\n".join(results) if results else "No flagged content detected in Azure Content Safety."

def moderate_text(user_text):
    # OpenAI Moderation - WITH GRACEFUL ERROR HANDLING
    try:
        # Attempt to use the real API
        response = openai_client.moderations.create(input=user_text)
        
        moderation_categories = response.results[0].categories
        moderation_flagged = response.results[0].flagged

        if moderation_flagged:
            categories_dict = {k: v for k, v in vars(moderation_categories).items() if not k.startswith('_')}
            openai_moderation_result = f"OpenAI flags the text for the following categories: {', '.join([category for category, flagged in categories_dict.items() if flagged])}"
        else:
            openai_moderation_result = "The text is not flagged for any moderation issues by OpenAI."
    except Exception as e:
        # Create a professional-looking fallback response that shows API connectivity issues
        openai_moderation_result = """
⚠️ OpenAI API Connection Error ⚠️

The OpenAI Moderation API is currently unavailable. This may be due to:
- Network connectivity issues in the hosting environment
- API rate limits
- API key configuration

Example output (if connected):
- For harmful content: "OpenAI flags the text for the following categories: violence, hate, self-harm"
- For safe content: "The text is not flagged for any moderation issues by OpenAI."

[This is a demonstration of a multi-model content moderation system that compares results across different providers]
"""
        print(f"Debug - OpenAI Error: {str(e)}")

    # Anthropic Moderation
    anthropic_moderation_result = "Error in Anthropic Moderation"
    try:
        prompt_template = """
        You are a content moderation expert tasked with categorizing user-generated text.
        Here is the user-generated text to categorize:
        <user_text>{user_text}</user_text>
        Based on the content, classify this text as either ALLOW or BLOCK. Return nothing else.
        """

        # Format the prompt with the user text
        prompt = prompt_template.format(user_text=user_text)

        # Send the prompt to Claude and get the response
        response = client.messages.create(
            model=MODEL_NAME,
            max_tokens=10,
            messages=[{"role": "user", "content": prompt}]
        ).content[0].text

        anthropic_moderation_result = f"Anthropic's moderation result: {response}"

    except Exception as e:
        anthropic_moderation_result = f"Error occurred with Anthropic: {e}"

    # Azure Content Safety Moderation
    azure_moderation_result = analyze_text_azure(user_text)

    # Toxic BERT Moderation (Hugging Face Model)
    toxic_result = toxic_bert(user_text)
    toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed"
    
    # Adjust the threshold for toxic classification if necessary (e.g., block if score > 0.85)
    toxic_severity = toxic_result[0]['score']
    if toxic_classification == "Allowed" and toxic_severity > 0.85:  # Set your threshold here
        toxic_classification = "Blocked"

    toxic_explanation = f"Toxic BERT classification: {toxic_classification}, Confidence: {toxic_severity:.2f}"

    return openai_moderation_result, anthropic_moderation_result, azure_moderation_result, toxic_explanation


# Create the Gradio interface with updated input and output labels
iface = gr.Interface(
    fn=moderate_text,
    inputs=gr.Textbox(lines=2, placeholder="Please write your text here..."),
    outputs=[
        gr.Textbox(label="OpenAI"),
        gr.Textbox(label="Anthropic"),
        gr.Textbox(label="Microsoft Azure"),
        gr.Textbox(label="Toxic BERT")
    ],
    title="Content Moderation Model Comparison Tool",
    description="Enter some text and get the moderation results from OpenAI, Anthropic/Claude, Microsoft Azure Content Safety, and Toxic BERT. Note: The OpenAI API connection may be unavailable in this demo."
)

if __name__ == "__main__":
    iface.launch()