add tokenizer
Browse files
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
runs/May01_22-45-11_7a60db93cccd/1651445184.7118316/events.out.tfevents.1651445184.7a60db93cccd.74.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c5c74c416a15542bd1b34eacf80dd7091ddbb1c5d40e34a6b9b6d6ece39b724
|
3 |
+
size 4591
|
runs/May01_22-45-11_7a60db93cccd/events.out.tfevents.1651445184.7a60db93cccd.74.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9f363f6570e5199e224d99ec606abb52d2da2d08408ba1f68e92940ca0407b5
|
3 |
+
size 7603
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "ء": 5, "آ": 6, "ئ": 7, "ا": 8, "ب": 9, "ت": 10, "ث": 11, "ج": 12, "ح": 13, "خ": 14, "د": 15, "ذ": 16, "ر": 17, "ز": 18, "س": 19, "ش": 20, "ص": 21, "ض": 22, "ط": 23, "ظ": 24, "ع": 25, "غ": 26, "ف": 27, "ق": 28, "ل": 29, "م": 30, "ن": 31, "ه": 32, "و": 33, "پ": 34, "چ": 35, "ژ": 36, "ک": 37, "گ": 38, "ی": 39, "[UNK]": 40, "[PAD]": 41}
|