Spaces:

neel692
/

Abusive-Comment-Detection

Running

App Files Files Community

Abusive-Comment-Detection / app.py

NeelTA

a case for unknown Language

d4ba74c about 2 months ago

raw

history blame

2.49 kB

	import gradio as gr
	from gradio.components import Text
	import joblib
	import clean
	import nltk
	nltk.download('wordnet')
	import numpy as np
	import language_detection
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch

	print("all imports worked")
	# Load pre-trained model
	model = joblib.load('model_joblib.pkl')
	print("model load ")
	tf = joblib.load('tf_joblib.pkl')
	print("tfidf load ")

	# Load Hindi abuse detection model
	hindi_tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
	hindi_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
	print("Hindi model loaded")

	def predict_hindi_text(text):
	inputs = hindi_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
	outputs = hindi_model(**inputs)
	predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
	scores = predictions[0].detach().numpy()
	return scores

	# Define function to predict whether sentence is abusive or not
	def predict_abusive_lang(text):
	print("original text ", text)

	lang = language_detection.en_hi_detection(text)
	print("language detected ", lang)

	if lang=='eng':
	cleaned_text = clean.text_cleaning(text)
	print("cleaned text ", text)
	text = tf.transform([cleaned_text])
	print("tfidf transformation ", text)
	prediction = model.predict(text)
	print("prediction ", prediction)
	if len(prediction)!=0 and prediction[0]==0:
	return ["Not Abusive", cleaned_text]
	elif len(prediction)!=0 and prediction[0]==1:
	return ["Abusive",cleaned_text]
	else :
	return ["Please write something in the comment box..","No cleaned text"]
	elif lang=='hi':
	print("using transformers for Hindi text")
	scores = predict_hindi_text(text)
	if scores[1] > scores[0]: # If score for abusive class is higher
	return ["Abusive", text]
	else:
	return ["Not Abusive", text]
	else:
	return ["Uknown Language","No cleaned text"]

	# Define the GRADIO output interfaces
	output_interfaces = [
	gr.Textbox(label="Result"),
	gr.Textbox(label="Cleaned text")
	]
	app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.")
	#Start the GRADIO app
	app.launch()