hylee
commited on
Commit
·
574e721
1
Parent(s):
d66a59f
add nltk diff version
Browse files- handler.py +5 -4
- requirements.txt +1 -1
handler.py
CHANGED
|
@@ -2,9 +2,9 @@ from typing import Dict, List, Any
|
|
| 2 |
from scipy.special import softmax
|
| 3 |
import numpy as np
|
| 4 |
import weakref
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
|
| 9 |
from utils import clean_str, clean_str_nopunct
|
| 10 |
import torch
|
|
@@ -138,12 +138,13 @@ class Transcript:
|
|
| 138 |
teacher_dict = {}
|
| 139 |
student_dict = {}
|
| 140 |
uptake_teacher_dict = {}
|
|
|
|
| 141 |
# stopwords = nltk.corpus.stopwords.word('english')
|
| 142 |
# print("stopwords: ", stopwords)
|
| 143 |
for utt in self.utterances:
|
| 144 |
words = (utt.get_clean_text(remove_punct=True)).split(' ')
|
| 145 |
for word in words:
|
| 146 |
-
|
| 147 |
if utt.role == 'teacher':
|
| 148 |
if word not in teacher_dict:
|
| 149 |
teacher_dict[word] = 0
|
|
|
|
| 2 |
from scipy.special import softmax
|
| 3 |
import numpy as np
|
| 4 |
import weakref
|
| 5 |
+
import nltk
|
| 6 |
+
from nltk.corpus import stopwords
|
| 7 |
+
nltk.download('stopwords')
|
| 8 |
|
| 9 |
from utils import clean_str, clean_str_nopunct
|
| 10 |
import torch
|
|
|
|
| 138 |
teacher_dict = {}
|
| 139 |
student_dict = {}
|
| 140 |
uptake_teacher_dict = {}
|
| 141 |
+
stopwords = stopwords.words('english')
|
| 142 |
# stopwords = nltk.corpus.stopwords.word('english')
|
| 143 |
# print("stopwords: ", stopwords)
|
| 144 |
for utt in self.utterances:
|
| 145 |
words = (utt.get_clean_text(remove_punct=True)).split(' ')
|
| 146 |
for word in words:
|
| 147 |
+
if word in stopwords: continue
|
| 148 |
if utt.role == 'teacher':
|
| 149 |
if word not in teacher_dict:
|
| 150 |
teacher_dict[word] = 0
|
requirements.txt
CHANGED
|
@@ -4,4 +4,4 @@ numpy==1.22.4
|
|
| 4 |
scipy==1.7.3
|
| 5 |
torch==1.10.2
|
| 6 |
transformers==4.29.1
|
| 7 |
-
|
|
|
|
| 4 |
scipy==1.7.3
|
| 5 |
torch==1.10.2
|
| 6 |
transformers==4.29.1
|
| 7 |
+
nltk==3.8.0
|