Update app.py
Browse files
app.py
CHANGED
@@ -2,15 +2,15 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
import re
|
4 |
from transformers import BertTokenizer, BertForSequenceClassification
|
5 |
-
import nltk
|
6 |
from nltk.tokenize import word_tokenize
|
7 |
from nltk.corpus import stopwords
|
8 |
from nltk.stem import WordNetLemmatizer
|
9 |
|
10 |
# Download required NLTK data
|
11 |
-
nltk.download("stopwords")
|
12 |
-
nltk.download("punkt")
|
13 |
-
nltk.download("wordnet")
|
14 |
|
15 |
# Load model and tokenizer
|
16 |
model_name = "./model"
|
@@ -21,7 +21,7 @@ model.to(device)
|
|
21 |
model.eval()
|
22 |
|
23 |
# NLP tools
|
24 |
-
stop_words = set(stopwords.words("english"))
|
25 |
lemmatizer = WordNetLemmatizer()
|
26 |
|
27 |
# MBTI Labels
|
@@ -38,7 +38,7 @@ def preprocess_text(text):
|
|
38 |
text = re.sub(r"http\S+|www.\S+", "", text)
|
39 |
text = re.sub(r"[^a-zA-Z\s]", "", text)
|
40 |
tokens = word_tokenize(text)
|
41 |
-
tokens = [lemmatizer.lemmatize(word) for word in tokens
|
42 |
return " ".join(tokens)
|
43 |
|
44 |
# Inference function
|
|
|
2 |
import torch
|
3 |
import re
|
4 |
from transformers import BertTokenizer, BertForSequenceClassification
|
5 |
+
# import nltk
|
6 |
from nltk.tokenize import word_tokenize
|
7 |
from nltk.corpus import stopwords
|
8 |
from nltk.stem import WordNetLemmatizer
|
9 |
|
10 |
# Download required NLTK data
|
11 |
+
# nltk.download("stopwords")
|
12 |
+
# nltk.download("punkt")
|
13 |
+
# nltk.download("wordnet")
|
14 |
|
15 |
# Load model and tokenizer
|
16 |
model_name = "./model"
|
|
|
21 |
model.eval()
|
22 |
|
23 |
# NLP tools
|
24 |
+
# stop_words = set(stopwords.words("english"))
|
25 |
lemmatizer = WordNetLemmatizer()
|
26 |
|
27 |
# MBTI Labels
|
|
|
38 |
text = re.sub(r"http\S+|www.\S+", "", text)
|
39 |
text = re.sub(r"[^a-zA-Z\s]", "", text)
|
40 |
tokens = word_tokenize(text)
|
41 |
+
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
42 |
return " ".join(tokens)
|
43 |
|
44 |
# Inference function
|