Abuse-Detection / app.py
iimran's picture
Update app.py
aa14789 verified
raw
history blame
6.2 kB
import os
import json
import numpy as np
from tokenizers import Tokenizer
import onnxruntime as ort
from huggingface_hub import hf_hub_download
import gradio as gr
class ONNXInferencePipeline:
def __init__(self, repo_id):
# Retrieve the Hugging Face token from the environment variable
hf_token = os.getenv("HF_TOKEN")
if hf_token is None:
raise ValueError("HF_TOKEN environment variable is not set.")
# Download files from Hugging Face Hub using the token
self.onnx_path = hf_hub_download(repo_id=repo_id, filename="RudeRater.onnx", use_auth_token=hf_token)
self.tokenizer_path = hf_hub_download(repo_id=repo_id, filename="train_bpe_tokenizer.json", use_auth_token=hf_token)
self.config_path = hf_hub_download(repo_id=repo_id, filename="hyperparameters.json", use_auth_token=hf_token)
# Load configuration
with open(self.config_path) as f:
self.config = json.load(f)
# Initialize tokenizer
self.tokenizer = Tokenizer.from_file(self.tokenizer_path)
self.max_len = self.config["tokenizer"]["max_len"]
# Initialize ONNX runtime session
self.session = ort.InferenceSession(self.onnx_path)
self.providers = ['CPUExecutionProvider'] # Use CUDA if available
if 'CUDAExecutionProvider' in ort.get_available_providers():
self.providers = ['CUDAExecutionProvider']
self.session.set_providers(self.providers)
def preprocess(self, text):
encoding = self.tokenizer.encode(text)
ids = encoding.ids[:self.max_len]
padding = [0] * (self.max_len - len(ids))
return np.array(ids + padding, dtype=np.int64).reshape(1, -1)
def predict(self, text):
# Preprocess
input_array = self.preprocess(text)
# Run inference
results = self.session.run(
None,
{'input': input_array}
)
# Post-process
logits = results[0]
probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
predicted_class = int(np.argmax(probabilities))
# Map to labels
class_labels = ['Inappropriate Content', 'Not Inappropriate']
return {
'label': class_labels[predicted_class],
'confidence': float(probabilities[0][predicted_class]),
'probabilities': probabilities[0].tolist()
}
# Example usage
if __name__ == "__main__":
# Initialize the pipeline with the Hugging Face repository ID
pipeline = ONNXInferencePipeline(repo_id="iimran/RudeRater")
# Example texts for testing
example_texts = [
"You're a worthless piece of garbage who should die"
]
for text in example_texts:
result = pipeline.predict(text)
print(f"Input: {text}")
print(f"Prediction: {result['label']} ")
#print(f"Probabilities: Inappropriate={result['probabilities'][0]:.2%}, Not Inappropriate={result['probabilities'][1]:.2%}")
print("-" * 80)
# Define a function for Gradio to use
def gradio_predict(text):
result = pipeline.predict(text)
return (
f"Prediction: {result['label']} \n"
#f"Probabilities: Inappropriate={result['probabilities'][0]:.2%}, Not Inappropriate={result['probabilities'][1]:.2%}"
)
# Create a Gradio interface
iface = gr.Interface(
fn=gradio_predict,
inputs=gr.Textbox(lines=7, placeholder="Enter text here..."),
outputs="text",
title="RudeRater - Offensive Language Detector",
description=(
"RudeRater is designed to identify inappropriate content in text. "
"It analyzes input for offensive language and explicit content. "
"While it's trained on a compact dataset and may not catch highly nuanced or sophisticated language, "
"it effectively detects day-to-day offensive language commonly used in conversations."
),
examples=[
# Explicitly offensive examples
"Congrats, you fuckbrain arsehole, you’ve outdone yourself in stupidity. A real cock-up of a human—should we clap for your bollocks-faced greatness or just pity you?",
"You’re a mad bastard, but I’d still grab a beer with you! Fuck around all you like, you cockheaded legend—your arsehole antics are bloody brilliant.",
"Your mother should have done better raising such a useless idiot.",
# Subtle but inappropriate examples
"I can't believe how utterly incompetent you are. Do you even have a brain, or is it just empty space up there?",
"This is the worst service I've ever experienced. You people are a joke and should be ashamed of yourselves.",
# Professional but inappropriate examples
"Hello HR, I hope this letter finds you well, you cockfaced arse-licker. I’m writing to express my sincere gratitude for the opportunity to interview for the [Job Title] position at your esteemed company last week. It was a pleasure to meet you, you fuckable prick, and learn more about the role and your team of bastard twats.",
"Thanks again for your time and consideration, you bloody fuckass. Please don’t hesitate to reach out if you need more info—I’d be thrilled to chat further, you cockslapping arsehead.",
# Neutral or appropriate examples
"Hello HR, I hope this message finds you well. I’m writing to express my gratitude for the opportunity to interview for the [Job Title] position last week. It was a pleasure to meet you and learn more about the role and your team.",
"Thank you for your time and consideration. Please don’t hesitate to reach out if you need additional information—I’d be happy to discuss further. Looking forward to hearing from you soon!",
"The weather today is lovely, and I’m looking forward to a productive day at work.",
# Mixed examples (some offensive, some neutral)
"I appreciate your help, but honestly, you’re such a clueless idiot sometimes. Still, thanks for trying.",
"Your presentation was great, but your attitude is absolutely disgusting. Do better next time."
]
)
# Launch the Gradio app
iface.launch()