Jayesh13's picture
Update app.py
c9917e4
raw
history blame
3.22 kB
import streamlit as st
import pickle
from tensorflow.keras.models import load_model
import re
import string
import nltk
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = stopwords.words('english')
import html
import unicodedata
from tensorflow.keras.preprocessing.text import text_to_word_sequence
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import metrics
from tensorflow.keras import optimizers
from tensorflow.keras.utils import plot_model
def remove_special_chars(text):
re1 = re.compile(r' +')
x1 = text.lower().replace('#39;', "'").replace('amp;', '&').replace('#146;', "'").replace(
'nbsp;', ' ').replace('#36;', '$').replace('\\n', "\n").replace('quot;', "'").replace(
'<br />', "\n").replace('\\"', '"').replace('<unk>', 'u_n').replace(' @.@ ', '.').replace(
' @-@ ', '-').replace('\\', ' \\ ')
return re1.sub(' ', html.unescape(x1))
def to_lowercase(text):
return text.lower()
def remove_punctuation(text):
"""Remove punctuation from list of tokenized words"""
translator = str.maketrans('', '', string.punctuation)
return text.translate(translator)
def replace_numbers(text):
"""Replace all interger occurrences in list of tokenized words with textual representation"""
return re.sub(r'\d+', '', text)
def remove_whitespaces(text):
return text.strip()
def remove_stopwords(words, stop_words):
return [word for word in words if word not in stop_words]
def stem_words(words):
"""Stem words in text"""
stemmer = PorterStemmer()
return [stemmer.stem(word) for word in words]
def lemmatize_words(words):
"""Lemmatize words in text"""
lemmatizer = WordNetLemmatizer()
return [lemmatizer.lemmatize(word) for word in words]
def lemmatize_verbs(words):
"""Lemmatize verbs in text"""
lemmatizer = WordNetLemmatizer()
return ' '.join([lemmatizer.lemmatize(word, pos='v') for word in words])
def text2words(text):
return word_tokenize(text)
def clean_text( text):
text = remove_special_chars(text)
text = remove_punctuation(text)
text = to_lowercase(text)
text = replace_numbers(text)
words = text2words(text)
words = remove_stopwords(words, stop_words)
#words = stem_words(words)# Either stem ovocar lemmatize
words = lemmatize_words(words)
words = lemmatize_verbs(words)
return ''.join(words)
model = load_model('tox_model.h5')
text = st.text_area('Enter some text')
comment = []
comment_input = comment.append(text)
comment_input = comment_input.apply(lambda x: clean_text(x))
comment_input = tok.texts_to_sequences(comment_input)
test = pad_sequences(comment_input,
maxlen=50,
truncating='post',
padding='post'
)
if text:
out[6] = model.predict(test)
st.json(out)