Spaces:
Runtime error
Runtime error
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from TurkishStemmer import TurkishStemmer | |
| import nltk | |
| import string | |
| # import for loading python objects (scikit-learn models) | |
| import pickle | |
| import streamlit as st | |
| import scikit-learn | |
| def custom_tokenizer_with_Turkish_stemmer(text): | |
| # my text was unicode so I had to use the unicode-specific translate function. If your documents are strings, you will need to use a different `translate` function here. `Translated` here just does search-replace. See the trans_table: any matching character in the set is replaced with `None` | |
| tokens = [word for word in nltk.word_tokenize(text.translate(trans_table))] | |
| stems = [stemmerTR.stem(item.lower()) for item in tokens] | |
| return stems | |
| def predictSMSdata(test_text): | |
| categories = ["legitimate", "spam"] | |
| categories.sort() | |
| # load model | |
| filename1 = "LinearSVC_SMS_spam_TR.pickle" | |
| file_handle1 = open(filename1, "rb") | |
| classifier = pickle.load(file_handle1) | |
| file_handle1.close() | |
| # load tfidf_vectorizer for transforming test text data | |
| filename2 = "tfidf_vectorizer_TR.pickle" | |
| file_handle2 = open(filename2, "rb") | |
| tfidf_vectorizer = pickle.load(file_handle2) | |
| file_handle2.close() | |
| test_list=[test_text] | |
| tfidf_vectorizer_vectors_test = tfidf_vectorizer.transform(test_list) | |
| predicted = classifier.predict(tfidf_vectorizer_vectors_test) | |
| print(categories[predicted[0]]) | |
| trans_table = {ord(c): None for c in string.punctuation + string.digits} | |
| stemmerTR = TurkishStemmer() | |
| text = st.text_area("enter some text!") | |
| if text: | |
| out = predictSMSdata(text) | |
| st.json(out) | |