NeelTA's picture
a case for unknown Language
d4ba74c
raw
history blame
2.49 kB
import gradio as gr
from gradio.components import Text
import joblib
import clean
import nltk
nltk.download('wordnet')
import numpy as np
import language_detection
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
print("all imports worked")
# Load pre-trained model
model = joblib.load('model_joblib.pkl')
print("model load ")
tf = joblib.load('tf_joblib.pkl')
print("tfidf load ")
# Load Hindi abuse detection model
hindi_tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
hindi_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
print("Hindi model loaded")
def predict_hindi_text(text):
inputs = hindi_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
outputs = hindi_model(**inputs)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
scores = predictions[0].detach().numpy()
return scores
# Define function to predict whether sentence is abusive or not
def predict_abusive_lang(text):
print("original text ", text)
lang = language_detection.en_hi_detection(text)
print("language detected ", lang)
if lang=='eng':
cleaned_text = clean.text_cleaning(text)
print("cleaned text ", text)
text = tf.transform([cleaned_text])
print("tfidf transformation ", text)
prediction = model.predict(text)
print("prediction ", prediction)
if len(prediction)!=0 and prediction[0]==0:
return ["Not Abusive", cleaned_text]
elif len(prediction)!=0 and prediction[0]==1:
return ["Abusive",cleaned_text]
else :
return ["Please write something in the comment box..","No cleaned text"]
elif lang=='hi':
print("using transformers for Hindi text")
scores = predict_hindi_text(text)
if scores[1] > scores[0]: # If score for abusive class is higher
return ["Abusive", text]
else:
return ["Not Abusive", text]
else:
return ["Uknown Language","No cleaned text"]
# Define the GRADIO output interfaces
output_interfaces = [
gr.Textbox(label="Result"),
gr.Textbox(label="Cleaned text")
]
app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.")
#Start the GRADIO app
app.launch()