Update index.html
Browse files- index.html +348 -18
index.html
CHANGED
@@ -1,19 +1,349 @@
|
|
1 |
-
<!
|
2 |
-
<html>
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
</html>
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<title>Vietnamese NLP Tasks – Benchmark Overview</title>
|
6 |
+
<meta name="viewport" content="width=device-width,initial-scale=1">
|
7 |
+
<style>
|
8 |
+
body { font-family: 'Segoe UI', Arial, sans-serif; background: #f7faff; color: #263347; margin: 0; }
|
9 |
+
.container { max-width: 1000px; margin: 36px auto; background: #fff; padding: 28px 20px 38px 20px; border-radius: 14px; box-shadow: 0 2px 14px #0002;}
|
10 |
+
h1, h2, h3, h4 { margin-top: 1.7em; margin-bottom: 0.5em; }
|
11 |
+
h1 { color: #12469b; font-size: 2.2em; margin-top: 0; }
|
12 |
+
h2 { color: #188754; border-left: 6px solid #a3d7ff; padding-left: 12px;}
|
13 |
+
h3, h4 { color: #1a324b;}
|
14 |
+
table { width: 100%; border-collapse: collapse; margin: 1.1em 0 1.8em 0;}
|
15 |
+
th, td { padding: 8px 10px; border-bottom: 1px solid #eee;}
|
16 |
+
th { background: #eaf3ff; }
|
17 |
+
tr:hover { background: #f6fbff;}
|
18 |
+
a { color: #2369cb; text-decoration: none;}
|
19 |
+
a:hover { text-decoration: underline;}
|
20 |
+
ul, ol { margin-top: 0.5em; margin-bottom: 1.1em; }
|
21 |
+
.dataset { background: #f1f8fc; border-left: 5px solid #97d3f6; padding: 8px 18px; margin: 12px 0 18px 0;}
|
22 |
+
.note { color: #555; background: #f6fcf7; border-left: 5px solid #7de59d; padding: 6px 16px; margin: 14px 0 20px 0;}
|
23 |
+
.icon { font-size: 1.1em; margin-right: 6px;}
|
24 |
+
.footer { text-align: center; font-size: 0.96em; color: #999; margin-top: 36px; }
|
25 |
+
@media (max-width: 700px) {
|
26 |
+
.container { padding: 6px; }
|
27 |
+
table, th, td { font-size: 14px;}
|
28 |
+
}
|
29 |
+
</style>
|
30 |
+
</head>
|
31 |
+
<body>
|
32 |
+
<div class="container">
|
33 |
+
|
34 |
+
<h1>🇻🇳 Vietnamese NLP Tasks <span style="font-size:0.8em; color:#555;">— Benchmark & SOTA Overview</span></h1>
|
35 |
+
<div style="margin-bottom:1.2em; color:#537fc2;">
|
36 |
+
<span class="icon">📈</span>
|
37 |
+
<b>This page tracks major Vietnamese NLP datasets and models for <u>Dependency Parsing</u>, <u>Intent Detection</u>, <u>Machine Translation</u>, <u>NER</u>, <u>POS Tagging</u>, <u>Semantic Parsing</u>, and <u>Word Segmentation</u>.</b>
|
38 |
+
</div>
|
39 |
+
|
40 |
+
<!-- DEPENDENCY PARSING -->
|
41 |
+
<h2>Dependency Parsing</h2>
|
42 |
+
<div class="dataset">
|
43 |
+
<span class="icon">🗂️</span>
|
44 |
+
<b>VnDT v1.1/v1.0</b>: Benchmark treebank >10K sentences. <br>
|
45 |
+
<b>Test:</b> 1,020 (v1.1), Dev: 200, Rest: Train.
|
46 |
+
</div>
|
47 |
+
|
48 |
+
<h3>VnDT v1.1</h3>
|
49 |
+
<table>
|
50 |
+
<tr>
|
51 |
+
<th>Model</th>
|
52 |
+
<th>LAS</th>
|
53 |
+
<th>UAS</th>
|
54 |
+
<th>Paper</th>
|
55 |
+
<th>Code</th>
|
56 |
+
</tr>
|
57 |
+
<tr>
|
58 |
+
<td>PhoNLP (2021)</td><td>79.11</td><td>85.47</td>
|
59 |
+
<td><a href="https://aclanthology.org/2021.naacl-demos.1.pdf">PhoNLP</a></td>
|
60 |
+
<td><a href="https://github.com/VinAIResearch/PhoNLP">Official</a></td>
|
61 |
+
</tr>
|
62 |
+
<tr>
|
63 |
+
<td>PhoBERT-base (2020)</td><td>78.77</td><td>85.22</td>
|
64 |
+
<td><a href="https://arxiv.org/abs/2003.00744">PhoBERT</a></td>
|
65 |
+
<td><a href="https://github.com/VinAIResearch/PhoBERT">Official</a></td>
|
66 |
+
</tr>
|
67 |
+
<tr>
|
68 |
+
<td>Biaffine (2017)</td><td>74.99</td><td>81.19</td>
|
69 |
+
<td><a href="https://arxiv.org/abs/1611.01734">Biaffine Parsing</a></td>
|
70 |
+
<td></td>
|
71 |
+
</tr>
|
72 |
+
<tr>
|
73 |
+
<td>VnCoreNLP (2018)</td><td>71.38</td><td>77.35</td>
|
74 |
+
<td><a href="http://aclweb.org/anthology/N18-5012">VnCoreNLP</a></td>
|
75 |
+
<td><a href="https://github.com/vncorenlp/VnCoreNLP">Official</a></td>
|
76 |
+
</tr>
|
77 |
+
</table>
|
78 |
+
|
79 |
+
<h3>VnDT v1.0 (Gold POS)</h3>
|
80 |
+
<table>
|
81 |
+
<tr>
|
82 |
+
<th>Model</th>
|
83 |
+
<th>LAS</th>
|
84 |
+
<th>UAS</th>
|
85 |
+
<th>Paper</th>
|
86 |
+
<th>Code</th>
|
87 |
+
</tr>
|
88 |
+
<tr>
|
89 |
+
<td>VnCoreNLP (2018)</td><td>73.39</td><td>79.02</td>
|
90 |
+
<td><a href="http://aclweb.org/anthology/N18-5012">VnCoreNLP</a></td>
|
91 |
+
<td><a href="https://github.com/vncorenlp/VnCoreNLP">Official</a></td>
|
92 |
+
</tr>
|
93 |
+
<tr>
|
94 |
+
<td>BIST BiLSTM graph (2016)</td><td>73.17</td><td>79.39</td>
|
95 |
+
<td><a href="https://aclweb.org/anthology/Q16-1023">BIST Parser</a></td>
|
96 |
+
<td><a href="https://github.com/elikip/bist-parser/tree/master/bmstparser/src">Official</a></td>
|
97 |
+
</tr>
|
98 |
+
<tr>
|
99 |
+
<td>MSTparser (2006)</td><td>70.29</td><td>76.47</td>
|
100 |
+
<td><a href="http://www.aclweb.org/anthology/P05-1012">MSTparser</a></td>
|
101 |
+
<td></td>
|
102 |
+
</tr>
|
103 |
+
</table>
|
104 |
+
|
105 |
+
<!-- INTENT DETECTION -->
|
106 |
+
<h2>Intent Detection & Slot Filling</h2>
|
107 |
+
<div class="dataset">
|
108 |
+
<span class="icon">🛫</span>
|
109 |
+
<b>PhoATIS Dataset</b> (flight booking domain): Train: 4,478, Dev: 500, Test: 893
|
110 |
+
</div>
|
111 |
+
<table>
|
112 |
+
<tr>
|
113 |
+
<th>Model</th><th>Intent Acc.</th><th>Slot F1</th><th>Sent. Acc.</th><th>Paper</th><th>Code</th>
|
114 |
+
</tr>
|
115 |
+
<tr>
|
116 |
+
<td>JointIDSF (2021)</td><td>97.62</td><td>94.98</td><td>86.25</td>
|
117 |
+
<td><a href="https://arxiv.org/abs/2104.02021">JointIDSF</a></td>
|
118 |
+
<td><a href="https://github.com/VinAIResearch/JointIDSF">Official</a></td>
|
119 |
+
</tr>
|
120 |
+
<tr>
|
121 |
+
<td>JointBERT+PhoBERT</td><td>97.40</td><td>94.75</td><td>85.55</td>
|
122 |
+
<td><a href="https://arxiv.org/abs/2104.02021">JointIDSF</a></td>
|
123 |
+
<td><a href="https://github.com/VinAIResearch/JointIDSF">Official</a></td>
|
124 |
+
</tr>
|
125 |
+
</table>
|
126 |
+
|
127 |
+
<!-- MACHINE TRANSLATION -->
|
128 |
+
<h2>Machine Translation</h2>
|
129 |
+
<div class="dataset">
|
130 |
+
<span class="icon">🌐</span>
|
131 |
+
<b>PhoMT Dataset</b>: 3.02M sentence pairs | 6 domains (TED, WikiHow, MediaWiki, OpenSubtitles, News, Blog)
|
132 |
+
</div>
|
133 |
+
<table>
|
134 |
+
<tr>
|
135 |
+
<th>Model</th><th>EN→VI (BLEU)</th><th>VI→EN (BLEU)</th><th>Paper</th><th>Code</th>
|
136 |
+
</tr>
|
137 |
+
<tr>
|
138 |
+
<td>mBART (2020)</td><td>43.46</td><td>39.78</td>
|
139 |
+
<td><a href="https://arxiv.org/abs/2001.08210">mBART</a></td>
|
140 |
+
<td><a href="https://github.com/pytorch/fairseq/tree/main/examples/mbart">Link</a></td>
|
141 |
+
</tr>
|
142 |
+
<tr>
|
143 |
+
<td>Transformer-big</td><td>42.94</td><td>37.83</td>
|
144 |
+
<td><a href="https://arxiv.org/abs/1706.03762">Transformer</a></td>
|
145 |
+
<td><a href="https://github.com/pytorch/fairseq/tree/main/examples/translation">Link</a></td>
|
146 |
+
</tr>
|
147 |
+
</table>
|
148 |
+
<div class="dataset">
|
149 |
+
<span class="icon">📋</span>
|
150 |
+
<b>IWSLT2015</b>: 150K sentence pairs (EN↔VI) | <a href="https://github.com/tensorflow/nmt">Data & Scripts</a>
|
151 |
+
</div>
|
152 |
+
<table>
|
153 |
+
<tr>
|
154 |
+
<th>Model</th><th>BLEU</th><th>Paper</th><th>Code</th>
|
155 |
+
</tr>
|
156 |
+
<tr>
|
157 |
+
<td>Nguyen & Salazar (2019)</td><td>32.8</td>
|
158 |
+
<td><a href="https://arxiv.org/abs/1910.05895">Transformers w/o Tears</a></td>
|
159 |
+
<td><a href="https://github.com/tnq177/transformers_without_tears">Official</a></td>
|
160 |
+
</tr>
|
161 |
+
<tr>
|
162 |
+
<td>Provilkov et al. (2019)</td><td>33.27 (uncased)</td>
|
163 |
+
<td><a href="https://arxiv.org/abs/1910.13267">BPE-Dropout</a></td>
|
164 |
+
<td></td>
|
165 |
+
</tr>
|
166 |
+
<tr>
|
167 |
+
<td>Xu et al. (2019)</td><td>31.4</td>
|
168 |
+
<td><a href="https://papers.nips.cc/paper/8689-understanding-and-improving-layer-normalization.pdf">Layer Norm</a></td>
|
169 |
+
<td><a href="https://github.com/lancopku/AdaNorm">Official</a></td>
|
170 |
+
</tr>
|
171 |
+
<tr>
|
172 |
+
<td>Transformer (2017)</td><td>28.9</td>
|
173 |
+
<td><a href="http://papers.nips.cc/paper/7181-attention-is-all-you-need">Transformer</a></td>
|
174 |
+
<td><a href="https://github.com/duyvuleo/Transformer-DyNet">Link</a></td>
|
175 |
+
</tr>
|
176 |
+
</table>
|
177 |
+
|
178 |
+
<!-- NER -->
|
179 |
+
<h2>Named Entity Recognition (NER)</h2>
|
180 |
+
<div class="dataset">
|
181 |
+
<span class="icon">🩺</span>
|
182 |
+
<b>PhoNER_COVID19</b>: 10 types, 34,984 entities, 10,027 sentences
|
183 |
+
</div>
|
184 |
+
<table>
|
185 |
+
<tr>
|
186 |
+
<th>Model</th><th>F1</th><th>Paper</th><th>Code</th>
|
187 |
+
</tr>
|
188 |
+
<tr>
|
189 |
+
<td>PhoBERT-large</td><td>94.5</td>
|
190 |
+
<td><a href="https://arxiv.org/abs/2003.00744">PhoBERT</a></td>
|
191 |
+
<td><a href="https://github.com/VinAIResearch/PhoBERT">Official</a></td>
|
192 |
+
</tr>
|
193 |
+
<tr>
|
194 |
+
<td>XLM-R-large</td><td>93.8</td>
|
195 |
+
<td><a href="https://aclanthology.org/2020.acl-main.747/">XLM-R</a></td>
|
196 |
+
<td><a href="https://github.com/facebookresearch/XLM">Official</a></td>
|
197 |
+
</tr>
|
198 |
+
<tr>
|
199 |
+
<td>BiLSTM-CRF + CNN-char</td><td>91.0</td>
|
200 |
+
<td><a href="http://www.aclweb.org/anthology/P16-1101">BiLSTM-CRF</a></td>
|
201 |
+
<td><a href="https://github.com/UKPLab/emnlp2017-bilstm-cnn-crf/">Link</a></td>
|
202 |
+
</tr>
|
203 |
+
</table>
|
204 |
+
|
205 |
+
<div class="dataset">
|
206 |
+
<span class="icon">📄</span>
|
207 |
+
<b>VLSP 2016 NER</b>: 16,861 train/dev, 2,831 test sentences.
|
208 |
+
</div>
|
209 |
+
<table>
|
210 |
+
<tr>
|
211 |
+
<th>Model</th><th>F1</th><th>Paper</th><th>Code</th>
|
212 |
+
</tr>
|
213 |
+
<tr>
|
214 |
+
<td>PhoBERT-large</td><td>94.7</td>
|
215 |
+
<td><a href="https://arxiv.org/abs/2003.00744">PhoBERT</a></td>
|
216 |
+
<td><a href="https://github.com/VinAIResearch/PhoBERT">Official</a></td>
|
217 |
+
</tr>
|
218 |
+
<tr>
|
219 |
+
<td>PhoNLP</td><td>94.41</td>
|
220 |
+
<td><a href="https://aclanthology.org/2021.naacl-demos.1.pdf">PhoNLP</a></td>
|
221 |
+
<td><a href="https://github.com/VinAIResearch/PhoNLP">Official</a></td>
|
222 |
+
</tr>
|
223 |
+
<tr>
|
224 |
+
<td>vELECTRA</td><td>94.07</td>
|
225 |
+
<td><a href="https://arxiv.org/abs/2006.15994">vELECTRA</a></td>
|
226 |
+
<td><a href="https://github.com/fpt-corp/viBERT">Official</a></td>
|
227 |
+
</tr>
|
228 |
+
<tr>
|
229 |
+
<td>VnCoreNLP</td><td>91.30</td>
|
230 |
+
<td><a href="http://aclweb.org/anthology/N18-5012">VnCoreNLP</a></td>
|
231 |
+
<td><a href="https://github.com/vncorenlp/VnCoreNLP">Official</a></td>
|
232 |
+
</tr>
|
233 |
+
</table>
|
234 |
+
|
235 |
+
<!-- PART OF SPEECH -->
|
236 |
+
<h2>Part-of-Speech Tagging</h2>
|
237 |
+
<div class="dataset">
|
238 |
+
<span class="icon">🔤</span>
|
239 |
+
<b>VLSP 2013</b>: 27,870 train/dev, 2,120 test
|
240 |
+
</div>
|
241 |
+
<table>
|
242 |
+
<tr>
|
243 |
+
<th>Model</th><th>Accuracy</th><th>Paper</th><th>Code</th>
|
244 |
+
</tr>
|
245 |
+
<tr>
|
246 |
+
<td>PhoBERT-large</td><td>96.8</td>
|
247 |
+
<td><a href="https://arxiv.org/abs/2003.00744">PhoBERT</a></td>
|
248 |
+
<td><a href="https://github.com/VinAIResearch/PhoBERT">Official</a></td>
|
249 |
+
</tr>
|
250 |
+
<tr>
|
251 |
+
<td>vELECTRA</td><td>96.77</td>
|
252 |
+
<td><a href="https://arxiv.org/abs/2006.15994">vELECTRA</a></td>
|
253 |
+
<td><a href="https://github.com/fpt-corp/viBERT">Official</a></td>
|
254 |
+
</tr>
|
255 |
+
<tr>
|
256 |
+
<td>PhoNLP</td><td>96.76</td>
|
257 |
+
<td><a href="https://aclanthology.org/2021.naacl-demos.1.pdf">PhoNLP</a></td>
|
258 |
+
<td><a href="https://github.com/VinAIResearch/PhoNLP">Official</a></td>
|
259 |
+
</tr>
|
260 |
+
<tr>
|
261 |
+
<td>PhoBERT-base</td><td>96.7</td>
|
262 |
+
<td><a href="https://arxiv.org/abs/2003.00744">PhoBERT</a></td>
|
263 |
+
<td><a href="https://github.com/VinAIResearch/PhoBERT">Official</a></td>
|
264 |
+
</tr>
|
265 |
+
<tr>
|
266 |
+
<td>VnCoreNLP-VnMarMoT</td><td>95.88</td>
|
267 |
+
<td><a href="http://aclweb.org/anthology/U17-1013">VnMarMoT</a></td>
|
268 |
+
<td><a href="https://github.com/datquocnguyen/vnmarmot">Official</a></td>
|
269 |
+
</tr>
|
270 |
+
<tr>
|
271 |
+
<td>BiLSTM-CRF + CNN-char</td><td>95.40</td>
|
272 |
+
<td><a href="http://www.aclweb.org/anthology/P16-1101">BiLSTM-CRF</a></td>
|
273 |
+
<td><a href="https://github.com/XuezheMax/LasagneNLP">Official</a></td>
|
274 |
+
</tr>
|
275 |
+
<tr>
|
276 |
+
<td>RDRPOSTagger</td><td>95.11</td>
|
277 |
+
<td><a href="http://www.aclweb.org/anthology/E14-2005">RDRPOSTagger</a></td>
|
278 |
+
<td><a href="https://github.com/datquocnguyen/rdrpostagger">Official</a></td>
|
279 |
+
</tr>
|
280 |
+
</table>
|
281 |
+
|
282 |
+
<!-- SEMANTIC PARSING -->
|
283 |
+
<h2>Semantic Parsing</h2>
|
284 |
+
<div class="dataset">
|
285 |
+
<span class="icon">🗃️</span>
|
286 |
+
<b>ViText2SQL</b>: 10K question/SQL pairs, the first public Text-to-SQL dataset for Vietnamese.
|
287 |
+
</div>
|
288 |
+
<table>
|
289 |
+
<tr>
|
290 |
+
<th>Model</th><th>Exact Match Acc.</th><th>Paper</th><th>Code</th><th>Note</th>
|
291 |
+
</tr>
|
292 |
+
<tr>
|
293 |
+
<td>IRNet (2019)</td><td>53.2</td>
|
294 |
+
<td><a href="https://aclanthology.org/2020.findings-emnlp.364/">ViText2SQL</a></td>
|
295 |
+
<td><a href="https://github.com/microsoft/IRNet">Link</a></td>
|
296 |
+
<td>Using PhoBERT encoder</td>
|
297 |
+
</tr>
|
298 |
+
<tr>
|
299 |
+
<td>EditSQL (2019)</td><td>52.6</td>
|
300 |
+
<td><a href="https://aclanthology.org/2020.findings-emnlp.364/">ViText2SQL</a></td>
|
301 |
+
<td><a href="https://github.com/ryanzhumich/editsql">Link</a></td>
|
302 |
+
<td>Using PhoBERT encoder</td>
|
303 |
+
</tr>
|
304 |
+
</table>
|
305 |
+
|
306 |
+
<!-- WORD SEGMENTATION -->
|
307 |
+
<h2>Word Segmentation</h2>
|
308 |
+
<div class="dataset">
|
309 |
+
<span class="icon">✂️</span>
|
310 |
+
<b>VLSP 2013</b>: 75k train, 2,120 test sentences (manually word-segmented)
|
311 |
+
</div>
|
312 |
+
<table>
|
313 |
+
<tr>
|
314 |
+
<th>Model</th><th>F1</th><th>Paper</th><th>Code</th>
|
315 |
+
</tr>
|
316 |
+
<tr>
|
317 |
+
<td>UITws-v1 (2019)</td><td>98.06</td>
|
318 |
+
<td><a href="https://arxiv.org/abs/2006.07804">UITws-v1</a></td>
|
319 |
+
<td><a href="https://github.com/ngannlt/UITws-v1">Official</a></td>
|
320 |
+
</tr>
|
321 |
+
<tr>
|
322 |
+
<td>VnCoreNLP-RDRsegmenter (2018)</td><td>97.90</td>
|
323 |
+
<td><a href="http://www.lrec-conf.org/proceedings/lrec2018/pdf/55.pdf">VnCoreNLP</a></td>
|
324 |
+
<td><a href="https://github.com/datquocnguyen/RDRsegmenter">Official</a></td>
|
325 |
+
</tr>
|
326 |
+
<tr>
|
327 |
+
<td>UETsegmenter (2016)</td><td>97.87</td>
|
328 |
+
<td><a href="http://doi.org/10.1109/RIVF.2016.7800279">UETsegmenter</a></td>
|
329 |
+
<td><a href="https://github.com/phongnt570/UETsegmenter">Official</a></td>
|
330 |
+
</tr>
|
331 |
+
<tr>
|
332 |
+
<td>vnTokenizer (2008)</td><td>97.33</td>
|
333 |
+
<td><a href="https://link.springer.com/chapter/10.1007/978-3-540-88282-4_23">vnTokenizer</a></td>
|
334 |
+
<td></td>
|
335 |
+
</tr>
|
336 |
+
<tr>
|
337 |
+
<td>JVnSegmenter (2006)</td><td>97.06</td>
|
338 |
+
<td><a href="http://www.aclweb.org/anthology/Y06-1028">JVnSegmenter</a></td>
|
339 |
+
<td></td>
|
340 |
+
</tr>
|
341 |
+
</table>
|
342 |
+
|
343 |
+
<div class="footer">
|
344 |
+
NLP Progress – Benchmarks collected by the open-source community.<br>
|
345 |
+
<span style="color:#ccc;">Style inspired by <a href="https://github.com/sebastianruder/NLP-progress" target="_blank">NLP-progress</a></span>
|
346 |
+
</div>
|
347 |
+
</div>
|
348 |
+
</body>
|
349 |
</html>
|