iimran commited on
Commit
0f5ffe4
·
verified ·
1 Parent(s): 92ff5c5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import numpy as np
4
+ from tokenizers import Tokenizer
5
+ import onnxruntime as ort
6
+ from huggingface_hub import hf_hub_download
7
+ import gradio as gr
8
+
9
+ class ONNXInferencePipeline:
10
+ def __init__(self, repo_id):
11
+ # Retrieve the Hugging Face token from the environment variable
12
+ hf_token = os.getenv("HF_TOKEN")
13
+ if hf_token is None:
14
+ raise ValueError("HF_TOKEN environment variable is not set.")
15
+
16
+ # Download files from Hugging Face Hub using the token
17
+ self.onnx_path = hf_hub_download(repo_id=repo_id, filename="RudeRater.onnx", use_auth_token=hf_token)
18
+ self.tokenizer_path = hf_hub_download(repo_id=repo_id, filename="train_bpe_tokenizer.json", use_auth_token=hf_token)
19
+ self.config_path = hf_hub_download(repo_id=repo_id, filename="hyperparameters.json", use_auth_token=hf_token)
20
+
21
+ # Load configuration
22
+ with open(self.config_path) as f:
23
+ self.config = json.load(f)
24
+
25
+ # Initialize tokenizer
26
+ self.tokenizer = Tokenizer.from_file(self.tokenizer_path)
27
+ self.max_len = self.config["tokenizer"]["max_len"]
28
+
29
+ # Initialize ONNX runtime session
30
+ self.session = ort.InferenceSession(self.onnx_path)
31
+ self.providers = ['CPUExecutionProvider'] # Use CUDA if available
32
+ if 'CUDAExecutionProvider' in ort.get_available_providers():
33
+ self.providers = ['CUDAExecutionProvider']
34
+ self.session.set_providers(self.providers)
35
+
36
+ def preprocess(self, text):
37
+ encoding = self.tokenizer.encode(text)
38
+ ids = encoding.ids[:self.max_len]
39
+ padding = [0] * (self.max_len - len(ids))
40
+ return np.array(ids + padding, dtype=np.int64).reshape(1, -1)
41
+
42
+ def predict(self, text):
43
+ # Preprocess
44
+ input_array = self.preprocess(text)
45
+
46
+ # Run inference
47
+ results = self.session.run(
48
+ None,
49
+ {'input': input_array}
50
+ )
51
+
52
+ # Post-process
53
+ logits = results[0]
54
+ probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
55
+ predicted_class = int(np.argmax(probabilities))
56
+
57
+ # Map to labels
58
+ class_labels = ['Inappropriate Content', 'Not Inappropriate']
59
+ return {
60
+ 'label': class_labels[predicted_class],
61
+ 'confidence': float(probabilities[0][predicted_class]),
62
+ 'probabilities': probabilities[0].tolist()
63
+ }
64
+
65
+ # Example usage
66
+ if __name__ == "__main__":
67
+ # Initialize the pipeline with the Hugging Face repository ID
68
+ pipeline = ONNXInferencePipeline(repo_id="iimran/RudeRater")
69
+
70
+ # Example texts for testing
71
+ example_texts = [
72
+ "This content contains explicit language and violent threats",
73
+ "The weather today is pleasant and suitable for all ages",
74
+ "You're a worthless piece of garbage who should die",
75
+ "Please remember to submit your reports by Friday"
76
+ ]
77
+
78
+ for text in example_texts:
79
+ result = pipeline.predict(text)
80
+ print(f"Input: {text}")
81
+ print(f"Prediction: {result['label']} ({result['confidence']:.2%})")
82
+ print(f"Probabilities: Inappropriate={result['probabilities'][0]:.2%}, Not Inappropriate={result['probabilities'][1]:.2%}")
83
+ print("-" * 80)
84
+
85
+ # Define a function for Gradio to use
86
+ def gradio_predict(text):
87
+ result = pipeline.predict(text)
88
+ return (
89
+ f"Prediction: {result['label']} ({result['confidence']:.2%})\n"
90
+ f"Probabilities: Inappropriate={result['probabilities'][0]:.2%}, Not Inappropriate={result['probabilities'][1]:.2%}"
91
+ )
92
+
93
+ # Create a Gradio interface
94
+ iface = gr.Interface(
95
+ fn=gradio_predict,
96
+ inputs=gr.Textbox(lines=7, placeholder="Enter text here..."),
97
+ outputs="text",
98
+ title="RudeRater - Content Appropriateness Classifier",
99
+ description="RudeRater is designed to identify inappropriate content in text. It analyzes input for offensive language, explicit content, or harmful material. Enter text to check its appropriateness.",
100
+ examples=[
101
+ "This is completely unacceptable behavior and I'll make sure you regret it",
102
+ "The community guidelines clearly prohibit any form of discrimination",
103
+ "Your mother should have done better raising such a useless idiot",
104
+ "We appreciate your feedback and will improve our services",
105
+ "I'm going to find you and make you pay for what you've done",
106
+ "The park maintenance schedule has been updated for summer"
107
+ ]
108
+ )
109
+
110
+ # Launch the Gradio app
111
+ iface.launch()