Adding `safetensors` variant of this model

#2
config.json CHANGED
@@ -3,18 +3,18 @@
3
  "CustomTransformerModel"
4
  ],
5
  "bos_token_id": 2,
6
- "d_ff": 2048,
7
- "d_model": 512,
8
- "dropout": 0.05,
9
  "eos_token_id": 3,
10
- "max_position_embeddings": 300,
11
  "model_type": "miscovery",
12
- "num_decoder_layers": 8,
13
- "num_encoder_layers": 8,
14
- "num_heads": 8,
15
  "pad_token_id": 0,
16
  "torch_dtype": "float32",
17
- "transformers_version": "4.35.2",
18
  "use_flash_attn": true,
19
- "vocab_size": 50000
20
  }
 
3
  "CustomTransformerModel"
4
  ],
5
  "bos_token_id": 2,
6
+ "d_ff": 3072,
7
+ "d_model": 768,
8
+ "dropout": 0.1,
9
  "eos_token_id": 3,
10
+ "max_position_embeddings": 512,
11
  "model_type": "miscovery",
12
+ "num_decoder_layers": 12,
13
+ "num_encoder_layers": 12,
14
+ "num_heads": 12,
15
  "pad_token_id": 0,
16
  "torch_dtype": "float32",
17
+ "transformers_version": "4.51.3",
18
  "use_flash_attn": true,
19
+ "vocab_size": 100000
20
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 2,
4
  "eos_token_id": 3,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.35.2"
7
  }
 
3
  "bos_token_id": 2,
4
  "eos_token_id": 3,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.51.3"
7
  }
model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4cd709b1c3d1d5e8b2a7db275c12a312498b5872393a342e3a46ac8363ba8c
3
- size 610135936
 
 
 
 
pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f63f166d49528902b95d1ccecc1f997466f715b043d419795f401dc211b3961f
3
- size 610232130
 
 
 
 
special_tokens_map.json CHANGED
@@ -2,21 +2,7 @@
2
  "additional_special_tokens": [
3
  "[LANG_EN]",
4
  "[LANG_AR]",
5
- "[LANG_AR_EG]",
6
- "[TRANSLATION]",
7
- "[SUMMARIZATION]",
8
- "[PARAPHRASING]",
9
- "[INSTRUCTIONS]",
10
- "[CALCULATE]",
11
- "[REORDER]",
12
- "[QUESTION]",
13
- "[ANSWER]",
14
- "[CHOICES]",
15
- "[START_OPTION]",
16
- "[END_OPTION]",
17
- "[REASONING]",
18
- "[START_THINKING]",
19
- "[END_THINKING]"
20
  ],
21
  "cls_token": {
22
  "content": "[CLS]",
 
2
  "additional_special_tokens": [
3
  "[LANG_EN]",
4
  "[LANG_AR]",
5
+ "[LANG_AR_EG]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  ],
7
  "cls_token": {
8
  "content": "[CLS]",
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -63,145 +63,20 @@
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": true
66
- },
67
- "8": {
68
- "content": "[TRANSLATION]",
69
- "lstrip": false,
70
- "normalized": false,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": true
74
- },
75
- "9": {
76
- "content": "[SUMMARIZATION]",
77
- "lstrip": false,
78
- "normalized": false,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": true
82
- },
83
- "10": {
84
- "content": "[PARAPHRASING]",
85
- "lstrip": false,
86
- "normalized": false,
87
- "rstrip": false,
88
- "single_word": false,
89
- "special": true
90
- },
91
- "11": {
92
- "content": "[INSTRUCTIONS]",
93
- "lstrip": false,
94
- "normalized": false,
95
- "rstrip": false,
96
- "single_word": false,
97
- "special": true
98
- },
99
- "12": {
100
- "content": "[CALCULATE]",
101
- "lstrip": false,
102
- "normalized": false,
103
- "rstrip": false,
104
- "single_word": false,
105
- "special": true
106
- },
107
- "13": {
108
- "content": "[REORDER]",
109
- "lstrip": false,
110
- "normalized": false,
111
- "rstrip": false,
112
- "single_word": false,
113
- "special": true
114
- },
115
- "14": {
116
- "content": "[QUESTION]",
117
- "lstrip": false,
118
- "normalized": false,
119
- "rstrip": false,
120
- "single_word": false,
121
- "special": true
122
- },
123
- "15": {
124
- "content": "[ANSWER]",
125
- "lstrip": false,
126
- "normalized": false,
127
- "rstrip": false,
128
- "single_word": false,
129
- "special": true
130
- },
131
- "16": {
132
- "content": "[CHOICES]",
133
- "lstrip": false,
134
- "normalized": false,
135
- "rstrip": false,
136
- "single_word": false,
137
- "special": true
138
- },
139
- "17": {
140
- "content": "[START_OPTION]",
141
- "lstrip": false,
142
- "normalized": false,
143
- "rstrip": false,
144
- "single_word": false,
145
- "special": true
146
- },
147
- "18": {
148
- "content": "[END_OPTION]",
149
- "lstrip": false,
150
- "normalized": false,
151
- "rstrip": false,
152
- "single_word": false,
153
- "special": true
154
- },
155
- "19": {
156
- "content": "[REASONING]",
157
- "lstrip": false,
158
- "normalized": false,
159
- "rstrip": false,
160
- "single_word": false,
161
- "special": true
162
- },
163
- "20": {
164
- "content": "[START_THINKING]",
165
- "lstrip": false,
166
- "normalized": false,
167
- "rstrip": false,
168
- "single_word": false,
169
- "special": true
170
- },
171
- "21": {
172
- "content": "[END_THINKING]",
173
- "lstrip": false,
174
- "normalized": false,
175
- "rstrip": false,
176
- "single_word": false,
177
- "special": true
178
  }
179
  },
180
  "additional_special_tokens": [
181
  "[LANG_EN]",
182
  "[LANG_AR]",
183
- "[LANG_AR_EG]",
184
- "[TRANSLATION]",
185
- "[SUMMARIZATION]",
186
- "[PARAPHRASING]",
187
- "[INSTRUCTIONS]",
188
- "[CALCULATE]",
189
- "[REORDER]",
190
- "[QUESTION]",
191
- "[ANSWER]",
192
- "[CHOICES]",
193
- "[START_OPTION]",
194
- "[END_OPTION]",
195
- "[REASONING]",
196
- "[START_THINKING]",
197
- "[END_THINKING]"
198
  ],
199
  "clean_up_tokenization_spaces": true,
200
  "cls_token": "[CLS]",
 
201
  "mask_token": "[MASK]",
202
  "model_max_length": 1000000000000000019884624838656,
203
  "pad_token": "[PAD]",
204
  "sep_token": "[SEP]",
205
- "tokenizer_class": "PreTrainedTokenizerFast",
206
  "unk_token": "[UNK]"
207
  }
 
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  },
68
  "additional_special_tokens": [
69
  "[LANG_EN]",
70
  "[LANG_AR]",
71
+ "[LANG_AR_EG]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  ],
73
  "clean_up_tokenization_spaces": true,
74
  "cls_token": "[CLS]",
75
+ "extra_special_tokens": {},
76
  "mask_token": "[MASK]",
77
  "model_max_length": 1000000000000000019884624838656,
78
  "pad_token": "[PAD]",
79
  "sep_token": "[SEP]",
80
+ "tokenizer_class": "PreTrainedTokenizer",
81
  "unk_token": "[UNK]"
82
  }