Update app.py
Browse files
app.py
CHANGED
@@ -3,9 +3,9 @@ import torch
|
|
3 |
import re
|
4 |
from transformers import BertTokenizer, BertForSequenceClassification
|
5 |
# import nltk
|
6 |
-
from nltk.tokenize import word_tokenize
|
7 |
-
from nltk.corpus import stopwords
|
8 |
-
from nltk.stem import WordNetLemmatizer
|
9 |
|
10 |
# Download required NLTK data
|
11 |
# nltk.download("stopwords")
|
@@ -37,9 +37,9 @@ def preprocess_text(text):
|
|
37 |
text = text.lower()
|
38 |
text = re.sub(r"http\S+|www.\S+", "", text)
|
39 |
text = re.sub(r"[^a-zA-Z\s]", "", text)
|
40 |
-
tokens = word_tokenize(text)
|
41 |
-
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
42 |
-
return " ".join(tokens)
|
43 |
|
44 |
# Inference function
|
45 |
def predict_mbti(text):
|
|
|
3 |
import re
|
4 |
from transformers import BertTokenizer, BertForSequenceClassification
|
5 |
# import nltk
|
6 |
+
# from nltk.tokenize import word_tokenize
|
7 |
+
# from nltk.corpus import stopwords
|
8 |
+
# from nltk.stem import WordNetLemmatizer
|
9 |
|
10 |
# Download required NLTK data
|
11 |
# nltk.download("stopwords")
|
|
|
37 |
text = text.lower()
|
38 |
text = re.sub(r"http\S+|www.\S+", "", text)
|
39 |
text = re.sub(r"[^a-zA-Z\s]", "", text)
|
40 |
+
# tokens = word_tokenize(text)
|
41 |
+
# tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
42 |
+
# return " ".join(tokens)
|
43 |
|
44 |
# Inference function
|
45 |
def predict_mbti(text):
|