zoha commited on
Commit
dea0e68
·
1 Parent(s): 24bfa5f

add tokenizer

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
runs/May01_22-45-11_7a60db93cccd/1651445184.7118316/events.out.tfevents.1651445184.7a60db93cccd.74.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c5c74c416a15542bd1b34eacf80dd7091ddbb1c5d40e34a6b9b6d6ece39b724
3
+ size 4591
runs/May01_22-45-11_7a60db93cccd/events.out.tfevents.1651445184.7a60db93cccd.74.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f363f6570e5199e224d99ec606abb52d2da2d08408ba1f68e92940ca0407b5
3
+ size 7603
vocab.json CHANGED
@@ -1 +1 @@
1
- {"ء": 0, "آ": 1, "ئ": 2, "ا": 3, "ب": 4, "ت": 5, "ث": 6, "ج": 7, "ح": 8, "خ": 9, "د": 10, "ذ": 11, "ر": 12, "ز": 13, "س": 14, "ش": 15, "ص": 16, "ض": 17, "ط": 18, "ظ": 19, "ع": 20, "غ": 21, "ف": 22, "ق": 23, "ل": 24, "م": 25, "ن": 26, "ه": 27, "و": 28, "پ": 29, "چ": 30, "ژ": 31, "ک": 32, "گ": 33, "ی": 34, "[UNK]": 35, "[PAD]": 36}
 
1
+ {"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "ء": 5, "آ": 6, "ئ": 7, "ا": 8, "ب": 9, "ت": 10, "ث": 11, "ج": 12, "ح": 13, "خ": 14, "د": 15, "ذ": 16, "ر": 17, "ز": 18, "س": 19, "ش": 20, "ص": 21, "ض": 22, "ط": 23, "ظ": 24, "ع": 25, "غ": 26, "ف": 27, "ق": 28, "ل": 29, "م": 30, "ن": 31, "ه": 32, "و": 33, "پ": 34, "چ": 35, "ژ": 36, "ک": 37, "گ": 38, "ی": 39, "[UNK]": 40, "[PAD]": 41}