Spaces:

EuroPython2022
/

ToxicCommentClassification

Runtime error

App Files Files Community

ShAnSantosh commited on Jul 16, 2022

Commit

1f8de22

1 Parent(s): def3c19

app file created

Browse files

Files changed (1) hide show

app.py +108 -0

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import torch
+import torch.nn as nn
+import gradio as gr
+import numpy as np
+import os
+import random
+from transformers import AutoConfig, AutoModel
+device = torch.device('cpu')
+labels = {
+ 0: 'toxic',
+ 1: 'severe_toxic',
+ 2: 'obscene',
+ 3: 'threat',
+ 4: 'insult',
+ 5: 'identity_hate',
+ }
+MODEL_NAME='roberta-base'
+NUM_CLASSES=6
+MAX_LEN = 128
+tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME, truncation=True, do_lower_case=True)
+class ToxicModel(torch.nn.Module):
+    def __init__(self):
+        super(ToxicModel, self).__init__()
+        hidden_dropout_prob: float = 0.1
+        layer_norm_eps: float = 1e-7
+        config = AutoConfig.from_pretrained(MODEL_NAME)
+        config.update(
+            {
+                "output_hidden_states": True,
+                "hidden_dropout_prob": hidden_dropout_prob,
+                "layer_norm_eps": layer_norm_eps,
+                "add_pooling_layer": False,
+                "num_labels": NUM_CLASSES,
+            }
+        )
+        self.transformer = AutoModel.from_pretrained(MODEL_NAME, config=config)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.dropout1 = nn.Dropout(0.1)
+        self.dropout2 = nn.Dropout(0.2)
+        self.dropout3 = nn.Dropout(0.3)
+        self.dropout4 = nn.Dropout(0.4)
+        self.dropout5 = nn.Dropout(0.5)
+        self.output = nn.Linear(config.hidden_size, NUM_CLASSES)
+    def forward(self, input_ids, attention_mask, token_type_ids):
+        transformer_out = self.transformer(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
+        sequence_output = transformer_out[0]
+        sequence_output = self.dropout(torch.mean(sequence_output, 1))
+        logits1 = self.output(self.dropout1(sequence_output))
+        logits2 = self.output(self.dropout2(sequence_output))
+        logits3 = self.output(self.dropout3(sequence_output))
+        logits4 = self.output(self.dropout4(sequence_output))
+        logits5 = self.output(self.dropout5(sequence_output))
+        logits = (logits1 + logits2 + logits3 + logits4 + logits5) / 5
+        return logits
+ def inference_fn(model, input_ids=None, attention_mask=None, token_type_ids=None):
+    model.eval()
+    input_ids = input_ids.to(device)
+    attention_mask = attention_mask.to(device)
+    token_type_ids = token_type_ids.to(device)
+    with torch.no_grad():
+        output = model(input_ids.unsqueeze(0), attention_mask.unsqueeze(0), token_type_ids.unsqueeze(0))
+    out = output.sigmoid().detach().cpu().numpy().flatten()
+    return out
+def predict(comment=None) -> dict:
+    text = str(comment)
+    text = " ".join(text.split())
+    inputs = tokenizer.encode_plus(
+            text,
+            None,
+            add_special_tokens=True,
+            max_length=MAX_LEN,
+            pad_to_max_length=True,
+            return_token_type_ids=True
+    )
+    ids = inputs['input_ids']
+    mask = inputs['attention_mask']
+    token_type_ids = inputs["token_type_ids"]
+    model = ToxicModel()
+    model.load_state_dict(torch.load("toxic_model_0.pth", map_location=torch.device(device)))
+    model.to(device)
+    predicted = inference_fn(model, ids, mask, token_type_ids)
+    return {labels[i]: float(predicted[i]) for i in range(NUM_CLASSES)}
+gr.Interface(fn=predict,
+             inputs='text',
+             outputs=gr.outputs.Label(num_top_classes=NUM_CLASSES)).launch()