SystemAdmin123 commited on
Commit
4660693
·
verified ·
1 Parent(s): 13c8326

Training in progress, step 160, checkpoint

Browse files
last-checkpoint/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|im_end|>": 100279,
3
+ "<|im_start|>": 100278
4
+ }
last-checkpoint/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "katuni4ka/tiny-random-dbrx",
3
+ "architectures": [
4
+ "DbrxForCausalLM"
5
+ ],
6
+ "attn_config": {
7
+ "clip_qkv": 8,
8
+ "kv_n_heads": 2,
9
+ "model_type": "",
10
+ "rope_theta": 500000
11
+ },
12
+ "d_model": 8,
13
+ "emb_pdrop": 0.0,
14
+ "ffn_config": {
15
+ "ffn_hidden_size": 8,
16
+ "model_type": "",
17
+ "moe_jitter_eps": 0,
18
+ "moe_loss_weight": 0.05,
19
+ "moe_num_experts": 16,
20
+ "moe_top_k": 4
21
+ },
22
+ "initializer_range": 0.02,
23
+ "max_seq_len": 32768,
24
+ "model_type": "dbrx",
25
+ "n_heads": 4,
26
+ "n_layers": 2,
27
+ "num_key_value_heads": 2,
28
+ "output_router_logits": false,
29
+ "resid_pdrop": 0.0,
30
+ "router_aux_loss_coef": 0.05,
31
+ "tie_word_embeddings": false,
32
+ "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.48.1",
34
+ "use_cache": false,
35
+ "vocab_size": 100280
36
+ }
last-checkpoint/generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "do_sample": true,
4
+ "transformers_version": "4.48.1"
5
+ }
last-checkpoint/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e3c84de2c2dd4c70b6daa9dd9a6276698a4a3c5f16ffb3e5c9e12e4bfa9c796
3
+ size 3224728
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18b540f097f038f52f8ea589eb6d1c7362509ff28117e7aa32d6c1909c96b585
3
+ size 3328626
last-checkpoint/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13c9ad11f70cb5d30587ede3f71f44d6bd22de048e7dea94626b103172f1a451
3
+ size 15984
last-checkpoint/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4db4ceaf3fc99f5683e1e54a0ce72b34e8a39029b46d2d3a4f3467f7123b4cbc
3
+ size 15984
last-checkpoint/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:794e4e0063bacc6479402dbf5a29a9ee7054a6d28bf769c2f8ef9992c3fb0728
3
+ size 15984
last-checkpoint/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4e8f6076b3b73998e368cd6a3855393db7b4f440f465de797edfe91a65c0702
3
+ size 15984
last-checkpoint/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbc8c6e30345a43def1fa8fe35027e62b6748496e4f328814c4fb13285a49364
3
+ size 15984
last-checkpoint/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19ddf1325b3ee86d34158861780505e1a1100feb6601d24cc30e33f3abf94ace
3
+ size 15984
last-checkpoint/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b413e4fea4da9a1f1386e7176b0bbb9ac2047d78224fa955d6e2b7332f1869dd
3
+ size 15984
last-checkpoint/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dfadee9a2b306408da618c8f99a7ad319a5ceefd44df0c0558c60a1ad8e9d89
3
+ size 15984
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f0c7ff54b85ffef1dcbffa5d8d256d7b7e02cf6f2a611b338e53d605c1ee098
3
+ size 1064
last-checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|pad|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
last-checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "100256": {
5
+ "content": "<||_unused_0_||>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "100257": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "100258": {
21
+ "content": "<|fim_prefix|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "100259": {
29
+ "content": "<|fim_middle|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "100260": {
37
+ "content": "<|fim_suffix|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "100261": {
45
+ "content": "<||_unused_1_||>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "100262": {
53
+ "content": "<||_unused_2_||>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "100263": {
61
+ "content": "<||_unused_3_||>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "100264": {
69
+ "content": "<||_unused_4_||>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "100265": {
77
+ "content": "<||_unused_5_||>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "100266": {
85
+ "content": "<||_unused_6_||>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "100267": {
93
+ "content": "<||_unused_7_||>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "100268": {
101
+ "content": "<||_unused_8_||>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "100269": {
109
+ "content": "<||_unused_9_||>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "100270": {
117
+ "content": "<||_unused_10_||>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "100271": {
125
+ "content": "<||_unused_11_||>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "100272": {
133
+ "content": "<||_unused_12_||>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "100273": {
141
+ "content": "<||_unused_13_||>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "100274": {
149
+ "content": "<||_unused_14_||>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "100275": {
157
+ "content": "<||_unused_15_||>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "100276": {
165
+ "content": "<|endofprompt|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "100277": {
173
+ "content": "<|pad|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "100278": {
181
+ "content": "<|im_start|>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "100279": {
189
+ "content": "<|im_end|>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ }
196
+ },
197
+ "bos_token": "<|endoftext|>",
198
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif 'system' not in messages[0]['role'] %}{% set loop_messages = messages %}{% set system_message = 'You are DBRX, created by Databricks. You were last updated in December 2023. You answer questions based on information available up to that point.\nYOU PROVIDE SHORT RESPONSES TO SHORT QUESTIONS OR STATEMENTS, but provide thorough responses to more complex and open-ended questions.\nYou assist with various tasks, from writing to coding (using markdown for code blocks — remember to use ``` with code, JSON, and tables).\n(You do not have real-time data access or code execution capabilities. You avoid stereotyping and provide balanced perspectives on controversial topics. You do not provide song lyrics, poems, or news articles and do not divulge details of your training data.)\nThis is your system prompt, guiding your responses. Do not reference it, just respond to the user. If you find yourself talking about this message, stop. You should be responding appropriately and usually that means not mentioning this.\nYOU DO NOT MENTION ANY OF THIS INFORMATION ABOUT YOURSELF UNLESS THE INFORMATION IS DIRECTLY PERTINENT TO THE USER\\'S QUERY.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if loop.index0 == 0 %}{% if system_message != false %}{{ '<|im_start|>system\n' + system_message | trim + '<|im_end|>\n'}}{% endif %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' }}{% else %}{{ '\n' + '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' }}{% endif %}{% if (add_generation_prompt == true and loop.last) %}{{ '\n' + '<|im_start|>' + 'assistant' + '\n' }}{% endif %}{% endfor %}",
199
+ "clean_up_tokenization_spaces": true,
200
+ "eos_token": "<|endoftext|>",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 32768,
203
+ "pad_token": "<|pad|>",
204
+ "tokenizer_class": "GPT2Tokenizer",
205
+ "unk_token": "<|endoftext|>",
206
+ "use_fast": true
207
+ }
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.1893491124260355,
5
+ "eval_steps": 20,
6
+ "global_step": 160,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.001183431952662722,
13
+ "eval_loss": 11.5,
14
+ "eval_runtime": 3.3956,
15
+ "eval_samples_per_second": 442.336,
16
+ "eval_steps_per_second": 27.683,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.011834319526627219,
21
+ "grad_norm": 0.004608154296875,
22
+ "learning_rate": 1.6000000000000003e-05,
23
+ "loss": 11.5,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.023668639053254437,
28
+ "grad_norm": 0.0023193359375,
29
+ "learning_rate": 3.2000000000000005e-05,
30
+ "loss": 11.5,
31
+ "step": 20
32
+ },
33
+ {
34
+ "epoch": 0.023668639053254437,
35
+ "eval_loss": 11.5,
36
+ "eval_runtime": 3.3351,
37
+ "eval_samples_per_second": 450.363,
38
+ "eval_steps_per_second": 28.185,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.03550295857988166,
43
+ "grad_norm": 0.00604248046875,
44
+ "learning_rate": 4.8e-05,
45
+ "loss": 11.5,
46
+ "step": 30
47
+ },
48
+ {
49
+ "epoch": 0.047337278106508875,
50
+ "grad_norm": 0.0032501220703125,
51
+ "learning_rate": 6.400000000000001e-05,
52
+ "loss": 11.5,
53
+ "step": 40
54
+ },
55
+ {
56
+ "epoch": 0.047337278106508875,
57
+ "eval_loss": 11.5,
58
+ "eval_runtime": 3.3126,
59
+ "eval_samples_per_second": 453.421,
60
+ "eval_steps_per_second": 28.377,
61
+ "step": 40
62
+ },
63
+ {
64
+ "epoch": 0.05917159763313609,
65
+ "grad_norm": 0.0087890625,
66
+ "learning_rate": 8e-05,
67
+ "loss": 11.5,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.07100591715976332,
72
+ "grad_norm": 0.004241943359375,
73
+ "learning_rate": 9.6e-05,
74
+ "loss": 11.5,
75
+ "step": 60
76
+ },
77
+ {
78
+ "epoch": 0.07100591715976332,
79
+ "eval_loss": 11.5,
80
+ "eval_runtime": 3.3352,
81
+ "eval_samples_per_second": 450.349,
82
+ "eval_steps_per_second": 28.184,
83
+ "step": 60
84
+ },
85
+ {
86
+ "epoch": 0.08284023668639054,
87
+ "grad_norm": 0.00238037109375,
88
+ "learning_rate": 0.00011200000000000001,
89
+ "loss": 11.5,
90
+ "step": 70
91
+ },
92
+ {
93
+ "epoch": 0.09467455621301775,
94
+ "grad_norm": 0.006256103515625,
95
+ "learning_rate": 0.00012800000000000002,
96
+ "loss": 11.5,
97
+ "step": 80
98
+ },
99
+ {
100
+ "epoch": 0.09467455621301775,
101
+ "eval_loss": 11.5,
102
+ "eval_runtime": 3.3585,
103
+ "eval_samples_per_second": 447.222,
104
+ "eval_steps_per_second": 27.989,
105
+ "step": 80
106
+ },
107
+ {
108
+ "epoch": 0.10650887573964497,
109
+ "grad_norm": 0.003936767578125,
110
+ "learning_rate": 0.000144,
111
+ "loss": 11.5,
112
+ "step": 90
113
+ },
114
+ {
115
+ "epoch": 0.11834319526627218,
116
+ "grad_norm": 0.0123291015625,
117
+ "learning_rate": 0.00016,
118
+ "loss": 11.5,
119
+ "step": 100
120
+ },
121
+ {
122
+ "epoch": 0.11834319526627218,
123
+ "eval_loss": 11.5,
124
+ "eval_runtime": 3.3221,
125
+ "eval_samples_per_second": 452.122,
126
+ "eval_steps_per_second": 28.295,
127
+ "step": 100
128
+ },
129
+ {
130
+ "epoch": 0.1301775147928994,
131
+ "grad_norm": 0.00634765625,
132
+ "learning_rate": 0.00017600000000000002,
133
+ "loss": 11.5,
134
+ "step": 110
135
+ },
136
+ {
137
+ "epoch": 0.14201183431952663,
138
+ "grad_norm": 0.005706787109375,
139
+ "learning_rate": 0.000192,
140
+ "loss": 11.5,
141
+ "step": 120
142
+ },
143
+ {
144
+ "epoch": 0.14201183431952663,
145
+ "eval_loss": 11.5,
146
+ "eval_runtime": 3.328,
147
+ "eval_samples_per_second": 451.328,
148
+ "eval_steps_per_second": 28.246,
149
+ "step": 120
150
+ },
151
+ {
152
+ "epoch": 0.15384615384615385,
153
+ "grad_norm": 0.01129150390625,
154
+ "learning_rate": 0.0001999978128380225,
155
+ "loss": 11.5,
156
+ "step": 130
157
+ },
158
+ {
159
+ "epoch": 0.16568047337278108,
160
+ "grad_norm": 0.0162353515625,
161
+ "learning_rate": 0.0001999803161162393,
162
+ "loss": 11.5,
163
+ "step": 140
164
+ },
165
+ {
166
+ "epoch": 0.16568047337278108,
167
+ "eval_loss": 11.5,
168
+ "eval_runtime": 3.3864,
169
+ "eval_samples_per_second": 443.539,
170
+ "eval_steps_per_second": 27.758,
171
+ "step": 140
172
+ },
173
+ {
174
+ "epoch": 0.17751479289940827,
175
+ "grad_norm": 0.0267333984375,
176
+ "learning_rate": 0.00019994532573409262,
177
+ "loss": 11.5,
178
+ "step": 150
179
+ },
180
+ {
181
+ "epoch": 0.1893491124260355,
182
+ "grad_norm": 0.030029296875,
183
+ "learning_rate": 0.00019989284781388617,
184
+ "loss": 11.5,
185
+ "step": 160
186
+ },
187
+ {
188
+ "epoch": 0.1893491124260355,
189
+ "eval_loss": 11.5,
190
+ "eval_runtime": 3.35,
191
+ "eval_samples_per_second": 448.364,
192
+ "eval_steps_per_second": 28.06,
193
+ "step": 160
194
+ }
195
+ ],
196
+ "logging_steps": 10,
197
+ "max_steps": 2500,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 3,
200
+ "save_steps": 40,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": true,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 25470836408320.0,
214
+ "train_batch_size": 2,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4193abd9d3c8f06a0b72e6601d6500a541caf61561918ea14e1ccfe5bac411cf
3
+ size 8312
last-checkpoint/vocab.json ADDED
The diff for this file is too large to render. See raw diff