File size: 2,167 Bytes
7f8de92
0fe2dfa
 
0934d02
d077642
70f481c
9847ff6
3754612
0f64e73
5d5484f
d077642
0f64e73
17bb76c
854d0de
a2a9b24
 
0934d02
17bb76c
 
be6580d
a2a9b24
 
 
34b533c
17bb76c
 
be6580d
5d5484f
61af1a8
5d5484f
 
 
de6a24f
45e0752
c8d475b
5d5484f
 
 
 
0a4c493
 
 
 
 
 
 
 
 
 
 
 
 
 
de6a24f
5d5484f
de6a24f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import streamlit as st
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification

st.title("Milestone #2 offensive statement prediction with pre-trained models")
st.write("in this basic demo you can select a model to judge whether or not the text below is offensive")
text = "The mail man looks so dumb"
st.write(text)

options = ["zero-shot-classification", "cardiffnlp/twitter-roberta-base-offensive", "Greys/milestonemodel"]
model = st.selectbox("Select a  pre-trained model", options)

con = st.button("Submit")
if con:
  if model == "zero-shot-classification":
    classifier = pipeline(model)
    res = classifier(text, candidate_labels=["offensive"])
    label = res['labels'][0]
    score = res['scores'][0]
    st.write(f"Prediction: {label}, Score: {score*100}% chance")
  
  if model == "cardiffnlp/twitter-roberta-base-offensive":
    classifier = pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-offensive', tokenizer='cardiffnlp/twitter-roberta-base-offensive')
    result = classifier(text)
    label = result[0]['label']
    score = result[0]['score']
    st.write(f"Prediction: {label}, Score: {score*100}% chance")
    
  if model == "Greys/milestonemodel":
  
    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
    model = AutoModelForSequenceClassification.from_pretrained("Greys/milestonemodel")
    my_list = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
    def classify_sentence(text):
      inputs = tokenizer(text, return_tensors="pt")
      outputs = model(**inputs)
      probs = outputs.logits.softmax(dim=1)
      return probs.detach().numpy()[0]
    probs = classify_sentence(text)
 def find_largest_number(numbers):
  if len(numbers) == 0:
    print("List is empty.")
    return None, None

  max_num = numbers[0]
  max_index = 0
  for i in range(1, len(numbers)):
    if numbers[i] > max_num:
      max_num = numbers[i]
      max_index = i

  return max_index


    print(probs)
    
    index = find_largest_number(probs)
    st.write(my_list[index])
#id,toxic,severe_toxic,obscene,threat,insult,identity_hate