ShAnSantosh commited on
Commit
1f8de22
·
1 Parent(s): def3c19

app file created

Browse files
Files changed (1) hide show
  1. app.py +108 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import gradio as gr
4
+ import numpy as np
5
+ import os
6
+ import random
7
+ from transformers import AutoConfig, AutoModel
8
+
9
+
10
+ device = torch.device('cpu')
11
+
12
+
13
+ labels = {
14
+ 0: 'toxic',
15
+ 1: 'severe_toxic',
16
+ 2: 'obscene',
17
+ 3: 'threat',
18
+ 4: 'insult',
19
+ 5: 'identity_hate',
20
+ }
21
+
22
+ MODEL_NAME='roberta-base'
23
+ NUM_CLASSES=6
24
+
25
+ MAX_LEN = 128
26
+ tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME, truncation=True, do_lower_case=True)
27
+
28
+ class ToxicModel(torch.nn.Module):
29
+ def __init__(self):
30
+ super(ToxicModel, self).__init__()
31
+ hidden_dropout_prob: float = 0.1
32
+ layer_norm_eps: float = 1e-7
33
+
34
+ config = AutoConfig.from_pretrained(MODEL_NAME)
35
+
36
+ config.update(
37
+ {
38
+ "output_hidden_states": True,
39
+ "hidden_dropout_prob": hidden_dropout_prob,
40
+ "layer_norm_eps": layer_norm_eps,
41
+ "add_pooling_layer": False,
42
+ "num_labels": NUM_CLASSES,
43
+ }
44
+ )
45
+ self.transformer = AutoModel.from_pretrained(MODEL_NAME, config=config)
46
+ self.dropout = nn.Dropout(config.hidden_dropout_prob)
47
+ self.dropout1 = nn.Dropout(0.1)
48
+ self.dropout2 = nn.Dropout(0.2)
49
+ self.dropout3 = nn.Dropout(0.3)
50
+ self.dropout4 = nn.Dropout(0.4)
51
+ self.dropout5 = nn.Dropout(0.5)
52
+ self.output = nn.Linear(config.hidden_size, NUM_CLASSES)
53
+
54
+
55
+ def forward(self, input_ids, attention_mask, token_type_ids):
56
+ transformer_out = self.transformer(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
57
+ sequence_output = transformer_out[0]
58
+ sequence_output = self.dropout(torch.mean(sequence_output, 1))
59
+ logits1 = self.output(self.dropout1(sequence_output))
60
+ logits2 = self.output(self.dropout2(sequence_output))
61
+ logits3 = self.output(self.dropout3(sequence_output))
62
+ logits4 = self.output(self.dropout4(sequence_output))
63
+ logits5 = self.output(self.dropout5(sequence_output))
64
+ logits = (logits1 + logits2 + logits3 + logits4 + logits5) / 5
65
+ return logits
66
+
67
+ def inference_fn(model, input_ids=None, attention_mask=None, token_type_ids=None):
68
+ model.eval()
69
+ input_ids = input_ids.to(device)
70
+ attention_mask = attention_mask.to(device)
71
+ token_type_ids = token_type_ids.to(device)
72
+
73
+ with torch.no_grad():
74
+ output = model(input_ids.unsqueeze(0), attention_mask.unsqueeze(0), token_type_ids.unsqueeze(0))
75
+ out = output.sigmoid().detach().cpu().numpy().flatten()
76
+
77
+ return out
78
+
79
+
80
+ def predict(comment=None) -> dict:
81
+ text = str(comment)
82
+ text = " ".join(text.split())
83
+
84
+ inputs = tokenizer.encode_plus(
85
+ text,
86
+ None,
87
+ add_special_tokens=True,
88
+ max_length=MAX_LEN,
89
+ pad_to_max_length=True,
90
+ return_token_type_ids=True
91
+ )
92
+ ids = inputs['input_ids']
93
+ mask = inputs['attention_mask']
94
+ token_type_ids = inputs["token_type_ids"]
95
+
96
+ model = ToxicModel()
97
+
98
+ model.load_state_dict(torch.load("toxic_model_0.pth", map_location=torch.device(device)))
99
+ model.to(device)
100
+
101
+ predicted = inference_fn(model, ids, mask, token_type_ids)
102
+
103
+ return {labels[i]: float(predicted[i]) for i in range(NUM_CLASSES)}
104
+
105
+
106
+ gr.Interface(fn=predict,
107
+ inputs='text',
108
+ outputs=gr.outputs.Label(num_top_classes=NUM_CLASSES)).launch()