Jayesh13 commited on
Commit
c9917e4
·
1 Parent(s): 99b7e58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -1
app.py CHANGED
@@ -1,9 +1,92 @@
1
  import streamlit as st
2
  import pickle
3
  from tensorflow.keras.models import load_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  model = load_model('tox_model.h5')
6
- text = st.text_area('Enter some text', 'Type here')
7
 
8
  comment = []
9
  comment_input = comment.append(text)
 
1
  import streamlit as st
2
  import pickle
3
  from tensorflow.keras.models import load_model
4
+ import re
5
+ import string
6
+
7
+ import nltk
8
+ from nltk.stem.porter import PorterStemmer
9
+ from nltk.stem import WordNetLemmatizer
10
+ from nltk.tokenize import word_tokenize
11
+
12
+ from nltk.corpus import stopwords
13
+ nltk.download('stopwords')
14
+ stop_words = stopwords.words('english')
15
+ import html
16
+ import unicodedata
17
+
18
+ from tensorflow.keras.preprocessing.text import text_to_word_sequence
19
+ from tensorflow.keras.preprocessing.text import Tokenizer
20
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
21
+ from tensorflow.keras import models
22
+ from tensorflow.keras import layers
23
+ from tensorflow.keras import losses
24
+ from tensorflow.keras import metrics
25
+ from tensorflow.keras import optimizers
26
+ from tensorflow.keras.utils import plot_model
27
+
28
+ def remove_special_chars(text):
29
+ re1 = re.compile(r' +')
30
+ x1 = text.lower().replace('#39;', "'").replace('amp;', '&').replace('#146;', "'").replace(
31
+ 'nbsp;', ' ').replace('#36;', '$').replace('\\n', "\n").replace('quot;', "'").replace(
32
+ '<br />', "\n").replace('\\"', '"').replace('<unk>', 'u_n').replace(' @.@ ', '.').replace(
33
+ ' @-@ ', '-').replace('\\', ' \\ ')
34
+ return re1.sub(' ', html.unescape(x1))
35
+
36
+ def to_lowercase(text):
37
+ return text.lower()
38
+
39
+ def remove_punctuation(text):
40
+ """Remove punctuation from list of tokenized words"""
41
+ translator = str.maketrans('', '', string.punctuation)
42
+ return text.translate(translator)
43
+
44
+ def replace_numbers(text):
45
+ """Replace all interger occurrences in list of tokenized words with textual representation"""
46
+ return re.sub(r'\d+', '', text)
47
+
48
+ def remove_whitespaces(text):
49
+ return text.strip()
50
+
51
+ def remove_stopwords(words, stop_words):
52
+ return [word for word in words if word not in stop_words]
53
+
54
+ def stem_words(words):
55
+ """Stem words in text"""
56
+ stemmer = PorterStemmer()
57
+ return [stemmer.stem(word) for word in words]
58
+
59
+ def lemmatize_words(words):
60
+ """Lemmatize words in text"""
61
+
62
+ lemmatizer = WordNetLemmatizer()
63
+ return [lemmatizer.lemmatize(word) for word in words]
64
+
65
+ def lemmatize_verbs(words):
66
+ """Lemmatize verbs in text"""
67
+
68
+ lemmatizer = WordNetLemmatizer()
69
+ return ' '.join([lemmatizer.lemmatize(word, pos='v') for word in words])
70
+
71
+ def text2words(text):
72
+ return word_tokenize(text)
73
+
74
+ def clean_text( text):
75
+ text = remove_special_chars(text)
76
+ text = remove_punctuation(text)
77
+ text = to_lowercase(text)
78
+ text = replace_numbers(text)
79
+ words = text2words(text)
80
+ words = remove_stopwords(words, stop_words)
81
+ #words = stem_words(words)# Either stem ovocar lemmatize
82
+ words = lemmatize_words(words)
83
+ words = lemmatize_verbs(words)
84
+
85
+ return ''.join(words)
86
+
87
 
88
  model = load_model('tox_model.h5')
89
+ text = st.text_area('Enter some text')
90
 
91
  comment = []
92
  comment_input = comment.append(text)