File size: 3,379 Bytes
37a02e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
from transformers import pipeline

# Load the toxicity classification pipeline
print("Loading toxicity classifier pipeline...")
toxicity_pipeline = pipeline(
    "text-classification",
    model="s-nlp/roberta_toxicity_classifier",
    tokenizer="s-nlp/roberta_toxicity_classifier",
    # return_all_scores=True  # This ensures we get scores for both classes
)
print("Pipeline loaded successfully!")

def toxicity_classification(text: str) -> dict:
    """
    Classify the toxicity of the given text.

    Args:
        text (str): The text to analyze

    Returns:
        dict: A dictionary containing toxicity scores and classification
    """
    if not text.strip():
        return {
            "error": "Please enter some text to analyze"
        }
    
    try:
        # Get predictions using the pipeline
        result = toxicity_pipeline(text)[0]
        
        # The pipeline returns a list like: [{'label': 'LABEL_0', 'score': 0.8}]
        # LABEL_0 is non-toxic, LABEL_1 is toxic (based on the model description)
        
        # Convert to a more readable format

        label = result['label']
        confidence = result['score']

        classification = "non-toxic" if label == "LABEL_0" else "toxic"
        
        return {
            "classification": classification,
            "confidence": round(confidence, 4)
        }
        
    except Exception as e:
        return {
            "error": f"Error processing text: {str(e)}"
        }

# Create the Gradio interface
demo = gr.Interface(
    fn=toxicity_classification,
    inputs=gr.Textbox(
        placeholder="Enter text to analyze for toxicity...",
        lines=3,
        label="Input Text"
    ),
    outputs=gr.JSON(label="Toxicity Analysis Results"),
    title="Text Toxicity Classification",
    description="Analyze text toxicity using RoBERTa transformer model (s-nlp/roberta_toxicity_classifier)",
    examples=[
        ["You are amazing!"],
        ["This is a wonderful day."],
        ["I disagree with your opinion."],
        ["The weather is nice today."]
    ]
)

# Launch the interface and MCP server
if __name__ == "__main__":
    # Add debugging section for local testing
    import sys
    
    # Check if running in debug mode (pass 'debug' as command line argument)
    if len(sys.argv) > 1 and sys.argv[1] == "debug":
        print("=" * 50)
        print("DEBUG MODE - Testing toxicity classification")
        print("=" * 50)
        
        # Test cases for debugging
        test_cases = [
            "You are amazing!",
            "This is a wonderful day.",
            "I hate you so much!",
            "The weather is nice today.",
            "You're such an idiot!",
            "I disagree with your opinion.",
            ""  # Empty string test
        ]
        
        for i, test_text in enumerate(test_cases):
            print(f"\n--- Test Case {i} ---")
            print(f"Input: '{test_text}'")
            
            # Set breakpoint here for debugging
            import pdb; pdb.set_trace()
            
            # Call the function
            result = toxicity_classification(test_text)
            
            print(f"Output: {result}")
            print("-" * 30)
        
        print("\nDebug testing completed!")
        
    else:
        # Normal Gradio mode
        demo.launch(mcp_server=True)