Spaces:

EuroPython2022
/

ToxicCommentClassification

Runtime error

App Files Files Community

ToxicCommentClassification / app.py

ShAnSantosh

app file created

1f8de22 about 3 years ago

raw

history blame

3.45 kB

	import torch
	import torch.nn as nn
	import gradio as gr
	import numpy as np
	import os
	import random
	from transformers import AutoConfig, AutoModel


	device = torch.device('cpu')


	labels = {
	0: 'toxic',
	1: 'severe_toxic',
	2: 'obscene',
	3: 'threat',
	4: 'insult',
	5: 'identity_hate',
	}

	MODEL_NAME='roberta-base'
	NUM_CLASSES=6

	MAX_LEN = 128
	tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME, truncation=True, do_lower_case=True)

	class ToxicModel(torch.nn.Module):
	def __init__(self):
	super(ToxicModel, self).__init__()
	hidden_dropout_prob: float = 0.1
	layer_norm_eps: float = 1e-7

	config = AutoConfig.from_pretrained(MODEL_NAME)

	config.update(
	{
	"output_hidden_states": True,
	"hidden_dropout_prob": hidden_dropout_prob,
	"layer_norm_eps": layer_norm_eps,
	"add_pooling_layer": False,
	"num_labels": NUM_CLASSES,
	}
	)
	self.transformer = AutoModel.from_pretrained(MODEL_NAME, config=config)
	self.dropout = nn.Dropout(config.hidden_dropout_prob)
	self.dropout1 = nn.Dropout(0.1)
	self.dropout2 = nn.Dropout(0.2)
	self.dropout3 = nn.Dropout(0.3)
	self.dropout4 = nn.Dropout(0.4)
	self.dropout5 = nn.Dropout(0.5)
	self.output = nn.Linear(config.hidden_size, NUM_CLASSES)


	def forward(self, input_ids, attention_mask, token_type_ids):
	transformer_out = self.transformer(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
	sequence_output = transformer_out[0]
	sequence_output = self.dropout(torch.mean(sequence_output, 1))
	logits1 = self.output(self.dropout1(sequence_output))
	logits2 = self.output(self.dropout2(sequence_output))
	logits3 = self.output(self.dropout3(sequence_output))
	logits4 = self.output(self.dropout4(sequence_output))
	logits5 = self.output(self.dropout5(sequence_output))
	logits = (logits1 + logits2 + logits3 + logits4 + logits5) / 5
	return logits

	def inference_fn(model, input_ids=None, attention_mask=None, token_type_ids=None):
	model.eval()
	input_ids = input_ids.to(device)
	attention_mask = attention_mask.to(device)
	token_type_ids = token_type_ids.to(device)

	with torch.no_grad():
	output = model(input_ids.unsqueeze(0), attention_mask.unsqueeze(0), token_type_ids.unsqueeze(0))
	out = output.sigmoid().detach().cpu().numpy().flatten()

	return out


	def predict(comment=None) -> dict:
	text = str(comment)
	text = " ".join(text.split())

	inputs = tokenizer.encode_plus(
	text,
	None,
	add_special_tokens=True,
	max_length=MAX_LEN,
	pad_to_max_length=True,
	return_token_type_ids=True
	)
	ids = inputs['input_ids']
	mask = inputs['attention_mask']
	token_type_ids = inputs["token_type_ids"]

	model = ToxicModel()

	model.load_state_dict(torch.load("toxic_model_0.pth", map_location=torch.device(device)))
	model.to(device)

	predicted = inference_fn(model, ids, mask, token_type_ids)

	return {labels[i]: float(predicted[i]) for i in range(NUM_CLASSES)}


	gr.Interface(fn=predict,
	inputs='text',
	outputs=gr.outputs.Label(num_top_classes=NUM_CLASSES)).launch()