Shuwei Hou
commited on
Commit
·
a7b67d5
1
Parent(s):
febafde
nltk_download
Browse files
segmentation/segmentation_batchalign.py
CHANGED
@@ -6,6 +6,10 @@ import torch
|
|
6 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
7 |
from nltk.tokenize import sent_tokenize
|
8 |
|
|
|
|
|
|
|
|
|
9 |
# input is the list of words, no punctuation, all lower case,
|
10 |
# output is the list of label: 0 represent the correspounding word is not the last word of c-unit,
|
11 |
# 1 represent the correspounding word is the last word of c-unit
|
|
|
6 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
7 |
from nltk.tokenize import sent_tokenize
|
8 |
|
9 |
+
import nltk
|
10 |
+
nltk.download('punkt_tab')
|
11 |
+
nltk.download('punkt')
|
12 |
+
|
13 |
# input is the list of words, no punctuation, all lower case,
|
14 |
# output is the list of label: 0 represent the correspounding word is not the last word of c-unit,
|
15 |
# 1 represent the correspounding word is the last word of c-unit
|