Shuwei Hou commited on
Commit
a7b67d5
·
1 Parent(s): febafde

nltk_download

Browse files
segmentation/segmentation_batchalign.py CHANGED
@@ -6,6 +6,10 @@ import torch
6
  from transformers import AutoTokenizer, AutoModelForTokenClassification
7
  from nltk.tokenize import sent_tokenize
8
 
 
 
 
 
9
  # input is the list of words, no punctuation, all lower case,
10
  # output is the list of label: 0 represent the correspounding word is not the last word of c-unit,
11
  # 1 represent the correspounding word is the last word of c-unit
 
6
  from transformers import AutoTokenizer, AutoModelForTokenClassification
7
  from nltk.tokenize import sent_tokenize
8
 
9
+ import nltk
10
+ nltk.download('punkt_tab')
11
+ nltk.download('punkt')
12
+
13
  # input is the list of words, no punctuation, all lower case,
14
  # output is the list of label: 0 represent the correspounding word is not the last word of c-unit,
15
  # 1 represent the correspounding word is the last word of c-unit