nRuaif commited on
Commit
d666458
·
1 Parent(s): c4fc286

Upload folder using huggingface_hub

Browse files
Untitled.ipynb CHANGED
@@ -2,11 +2,646 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "id": "127668ee-44f0-4438-9337-e7c4a486aea3",
7
  "metadata": {},
8
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "source": [
 
10
  "from huggingface_hub import HfApi\n",
11
  "\n",
12
  "api = HfApi()\n",
@@ -16,14 +651,22 @@
16
  "\n",
17
  "api.upload_folder(\n",
18
  "\n",
19
- " folder_path=r\"C:\\dataset\\New folder\",\n",
20
  "\n",
21
- " repo_id=\"MinervaAI/Random-roleplay-instruction\",\n",
22
  "\n",
23
- " repo_type=\"dataset\",\n",
24
  "\n",
25
  ")"
26
  ]
 
 
 
 
 
 
 
 
27
  }
28
  ],
29
  "metadata": {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 3,
6
  "id": "127668ee-44f0-4438-9337-e7c4a486aea3",
7
  "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "/workspace/axolotl\n"
14
+ ]
15
+ },
16
+ {
17
+ "data": {
18
+ "application/vnd.jupyter.widget-view+json": {
19
+ "model_id": "5f308b1ddb654821afe650b708175cc2",
20
+ "version_major": 2,
21
+ "version_minor": 0
22
+ },
23
+ "text/plain": [
24
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
25
+ ]
26
+ },
27
+ "metadata": {},
28
+ "output_type": "display_data"
29
+ },
30
+ {
31
+ "data": {
32
+ "application/vnd.jupyter.widget-view+json": {
33
+ "model_id": "6aaef8ad6f75454991e62a4d1e8ff05e",
34
+ "version_major": 2,
35
+ "version_minor": 0
36
+ },
37
+ "text/plain": [
38
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
39
+ ]
40
+ },
41
+ "metadata": {},
42
+ "output_type": "display_data"
43
+ },
44
+ {
45
+ "data": {
46
+ "application/vnd.jupyter.widget-view+json": {
47
+ "model_id": "43f479d5951b4c08848897dbda7a9483",
48
+ "version_major": 2,
49
+ "version_minor": 0
50
+ },
51
+ "text/plain": [
52
+ "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
53
+ ]
54
+ },
55
+ "metadata": {},
56
+ "output_type": "display_data"
57
+ },
58
+ {
59
+ "data": {
60
+ "application/vnd.jupyter.widget-view+json": {
61
+ "model_id": "491a2b982ebe4269a0a1b302938fd82f",
62
+ "version_major": 2,
63
+ "version_minor": 0
64
+ },
65
+ "text/plain": [
66
+ "Upload 43 LFS files: 0%| | 0/43 [00:00<?, ?it/s]"
67
+ ]
68
+ },
69
+ "metadata": {},
70
+ "output_type": "display_data"
71
+ },
72
+ {
73
+ "data": {
74
+ "application/vnd.jupyter.widget-view+json": {
75
+ "model_id": "de17bf15a4df42fea8781b73b667c4e7",
76
+ "version_major": 2,
77
+ "version_minor": 0
78
+ },
79
+ "text/plain": [
80
+ "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
81
+ ]
82
+ },
83
+ "metadata": {},
84
+ "output_type": "display_data"
85
+ },
86
+ {
87
+ "data": {
88
+ "application/vnd.jupyter.widget-view+json": {
89
+ "model_id": "ca5035dfd1e146138cdea7b14d3af6c8",
90
+ "version_major": 2,
91
+ "version_minor": 0
92
+ },
93
+ "text/plain": [
94
+ "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
95
+ ]
96
+ },
97
+ "metadata": {},
98
+ "output_type": "display_data"
99
+ },
100
+ {
101
+ "data": {
102
+ "application/vnd.jupyter.widget-view+json": {
103
+ "model_id": "47a61e2634844e1ca0c556626ff249c2",
104
+ "version_major": 2,
105
+ "version_minor": 0
106
+ },
107
+ "text/plain": [
108
+ "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
109
+ ]
110
+ },
111
+ "metadata": {},
112
+ "output_type": "display_data"
113
+ },
114
+ {
115
+ "data": {
116
+ "application/vnd.jupyter.widget-view+json": {
117
+ "model_id": "6a9b2c58d70b4ecfa879aaf3caea5edc",
118
+ "version_major": 2,
119
+ "version_minor": 0
120
+ },
121
+ "text/plain": [
122
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
123
+ ]
124
+ },
125
+ "metadata": {},
126
+ "output_type": "display_data"
127
+ },
128
+ {
129
+ "data": {
130
+ "application/vnd.jupyter.widget-view+json": {
131
+ "model_id": "1d48ef30c59244f09a69b208b000ceee",
132
+ "version_major": 2,
133
+ "version_minor": 0
134
+ },
135
+ "text/plain": [
136
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
137
+ ]
138
+ },
139
+ "metadata": {},
140
+ "output_type": "display_data"
141
+ },
142
+ {
143
+ "data": {
144
+ "application/vnd.jupyter.widget-view+json": {
145
+ "model_id": "94b212b03565444e8f7cedf2688feb24",
146
+ "version_major": 2,
147
+ "version_minor": 0
148
+ },
149
+ "text/plain": [
150
+ "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
151
+ ]
152
+ },
153
+ "metadata": {},
154
+ "output_type": "display_data"
155
+ },
156
+ {
157
+ "data": {
158
+ "application/vnd.jupyter.widget-view+json": {
159
+ "model_id": "0ba7f609f57b4cc5af970e364bb3ee27",
160
+ "version_major": 2,
161
+ "version_minor": 0
162
+ },
163
+ "text/plain": [
164
+ "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
165
+ ]
166
+ },
167
+ "metadata": {},
168
+ "output_type": "display_data"
169
+ },
170
+ {
171
+ "data": {
172
+ "application/vnd.jupyter.widget-view+json": {
173
+ "model_id": "8be371a76bb14e2caa2ec672c112e23e",
174
+ "version_major": 2,
175
+ "version_minor": 0
176
+ },
177
+ "text/plain": [
178
+ "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
179
+ ]
180
+ },
181
+ "metadata": {},
182
+ "output_type": "display_data"
183
+ },
184
+ {
185
+ "data": {
186
+ "application/vnd.jupyter.widget-view+json": {
187
+ "model_id": "555efd66b20c49eab3edd2518af34f57",
188
+ "version_major": 2,
189
+ "version_minor": 0
190
+ },
191
+ "text/plain": [
192
+ "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
193
+ ]
194
+ },
195
+ "metadata": {},
196
+ "output_type": "display_data"
197
+ },
198
+ {
199
+ "data": {
200
+ "application/vnd.jupyter.widget-view+json": {
201
+ "model_id": "dd48f67cda094c958349a2f0a3107a3e",
202
+ "version_major": 2,
203
+ "version_minor": 0
204
+ },
205
+ "text/plain": [
206
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
207
+ ]
208
+ },
209
+ "metadata": {},
210
+ "output_type": "display_data"
211
+ },
212
+ {
213
+ "data": {
214
+ "application/vnd.jupyter.widget-view+json": {
215
+ "model_id": "bdf4c85a50c54a1d8ee0c4805538cedc",
216
+ "version_major": 2,
217
+ "version_minor": 0
218
+ },
219
+ "text/plain": [
220
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
221
+ ]
222
+ },
223
+ "metadata": {},
224
+ "output_type": "display_data"
225
+ },
226
+ {
227
+ "data": {
228
+ "application/vnd.jupyter.widget-view+json": {
229
+ "model_id": "6294a1cf6111457ab2722f5570eaf5e9",
230
+ "version_major": 2,
231
+ "version_minor": 0
232
+ },
233
+ "text/plain": [
234
+ "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
235
+ ]
236
+ },
237
+ "metadata": {},
238
+ "output_type": "display_data"
239
+ },
240
+ {
241
+ "data": {
242
+ "application/vnd.jupyter.widget-view+json": {
243
+ "model_id": "8d444fba036549dc8d5a86c9678c6f10",
244
+ "version_major": 2,
245
+ "version_minor": 0
246
+ },
247
+ "text/plain": [
248
+ "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
249
+ ]
250
+ },
251
+ "metadata": {},
252
+ "output_type": "display_data"
253
+ },
254
+ {
255
+ "data": {
256
+ "application/vnd.jupyter.widget-view+json": {
257
+ "model_id": "c042cf944dde4b6ab62fb374b4525c6e",
258
+ "version_major": 2,
259
+ "version_minor": 0
260
+ },
261
+ "text/plain": [
262
+ "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
263
+ ]
264
+ },
265
+ "metadata": {},
266
+ "output_type": "display_data"
267
+ },
268
+ {
269
+ "data": {
270
+ "application/vnd.jupyter.widget-view+json": {
271
+ "model_id": "1068ab04d0274c03904d387bdcdf5de7",
272
+ "version_major": 2,
273
+ "version_minor": 0
274
+ },
275
+ "text/plain": [
276
+ "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
277
+ ]
278
+ },
279
+ "metadata": {},
280
+ "output_type": "display_data"
281
+ },
282
+ {
283
+ "data": {
284
+ "application/vnd.jupyter.widget-view+json": {
285
+ "model_id": "2a95d0b7842748dabd9fc1f8c18deb39",
286
+ "version_major": 2,
287
+ "version_minor": 0
288
+ },
289
+ "text/plain": [
290
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
291
+ ]
292
+ },
293
+ "metadata": {},
294
+ "output_type": "display_data"
295
+ },
296
+ {
297
+ "data": {
298
+ "application/vnd.jupyter.widget-view+json": {
299
+ "model_id": "f7797fab17a3420f95232a252c8d772c",
300
+ "version_major": 2,
301
+ "version_minor": 0
302
+ },
303
+ "text/plain": [
304
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
305
+ ]
306
+ },
307
+ "metadata": {},
308
+ "output_type": "display_data"
309
+ },
310
+ {
311
+ "data": {
312
+ "application/vnd.jupyter.widget-view+json": {
313
+ "model_id": "1bafc37d5dca444bb7c6ef711f7861ae",
314
+ "version_major": 2,
315
+ "version_minor": 0
316
+ },
317
+ "text/plain": [
318
+ "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
319
+ ]
320
+ },
321
+ "metadata": {},
322
+ "output_type": "display_data"
323
+ },
324
+ {
325
+ "data": {
326
+ "application/vnd.jupyter.widget-view+json": {
327
+ "model_id": "293fda7d9cd14487b770342136ed25d0",
328
+ "version_major": 2,
329
+ "version_minor": 0
330
+ },
331
+ "text/plain": [
332
+ "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
333
+ ]
334
+ },
335
+ "metadata": {},
336
+ "output_type": "display_data"
337
+ },
338
+ {
339
+ "data": {
340
+ "application/vnd.jupyter.widget-view+json": {
341
+ "model_id": "c36233f77fb14c2698087dd20522d4eb",
342
+ "version_major": 2,
343
+ "version_minor": 0
344
+ },
345
+ "text/plain": [
346
+ "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
347
+ ]
348
+ },
349
+ "metadata": {},
350
+ "output_type": "display_data"
351
+ },
352
+ {
353
+ "data": {
354
+ "application/vnd.jupyter.widget-view+json": {
355
+ "model_id": "3cef58ea84be4f3dae5e848ff5a6f27c",
356
+ "version_major": 2,
357
+ "version_minor": 0
358
+ },
359
+ "text/plain": [
360
+ "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
361
+ ]
362
+ },
363
+ "metadata": {},
364
+ "output_type": "display_data"
365
+ },
366
+ {
367
+ "data": {
368
+ "application/vnd.jupyter.widget-view+json": {
369
+ "model_id": "e952241ba9be4c9fa8c9b2eeb1e3d9e4",
370
+ "version_major": 2,
371
+ "version_minor": 0
372
+ },
373
+ "text/plain": [
374
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
375
+ ]
376
+ },
377
+ "metadata": {},
378
+ "output_type": "display_data"
379
+ },
380
+ {
381
+ "data": {
382
+ "application/vnd.jupyter.widget-view+json": {
383
+ "model_id": "c0dce134d40942ab999de97456fe9b32",
384
+ "version_major": 2,
385
+ "version_minor": 0
386
+ },
387
+ "text/plain": [
388
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
389
+ ]
390
+ },
391
+ "metadata": {},
392
+ "output_type": "display_data"
393
+ },
394
+ {
395
+ "data": {
396
+ "application/vnd.jupyter.widget-view+json": {
397
+ "model_id": "23a3b1c6a43f497b815f163b35460238",
398
+ "version_major": 2,
399
+ "version_minor": 0
400
+ },
401
+ "text/plain": [
402
+ "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
403
+ ]
404
+ },
405
+ "metadata": {},
406
+ "output_type": "display_data"
407
+ },
408
+ {
409
+ "data": {
410
+ "application/vnd.jupyter.widget-view+json": {
411
+ "model_id": "1b971333e98e46ba9a10b38349a51f8f",
412
+ "version_major": 2,
413
+ "version_minor": 0
414
+ },
415
+ "text/plain": [
416
+ "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
417
+ ]
418
+ },
419
+ "metadata": {},
420
+ "output_type": "display_data"
421
+ },
422
+ {
423
+ "data": {
424
+ "application/vnd.jupyter.widget-view+json": {
425
+ "model_id": "bf03b21ac07a4980aecb4de18e18827c",
426
+ "version_major": 2,
427
+ "version_minor": 0
428
+ },
429
+ "text/plain": [
430
+ "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
431
+ ]
432
+ },
433
+ "metadata": {},
434
+ "output_type": "display_data"
435
+ },
436
+ {
437
+ "data": {
438
+ "application/vnd.jupyter.widget-view+json": {
439
+ "model_id": "9fb30b3564964dc796e0499f15d99c34",
440
+ "version_major": 2,
441
+ "version_minor": 0
442
+ },
443
+ "text/plain": [
444
+ "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
445
+ ]
446
+ },
447
+ "metadata": {},
448
+ "output_type": "display_data"
449
+ },
450
+ {
451
+ "data": {
452
+ "application/vnd.jupyter.widget-view+json": {
453
+ "model_id": "64204effaf14419f9519fa67dbcdffac",
454
+ "version_major": 2,
455
+ "version_minor": 0
456
+ },
457
+ "text/plain": [
458
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
459
+ ]
460
+ },
461
+ "metadata": {},
462
+ "output_type": "display_data"
463
+ },
464
+ {
465
+ "data": {
466
+ "application/vnd.jupyter.widget-view+json": {
467
+ "model_id": "d904d221326142dab018eef1e192ece5",
468
+ "version_major": 2,
469
+ "version_minor": 0
470
+ },
471
+ "text/plain": [
472
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
473
+ ]
474
+ },
475
+ "metadata": {},
476
+ "output_type": "display_data"
477
+ },
478
+ {
479
+ "data": {
480
+ "application/vnd.jupyter.widget-view+json": {
481
+ "model_id": "b378d36e28dd4d3b8624b25461fd74a6",
482
+ "version_major": 2,
483
+ "version_minor": 0
484
+ },
485
+ "text/plain": [
486
+ "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
487
+ ]
488
+ },
489
+ "metadata": {},
490
+ "output_type": "display_data"
491
+ },
492
+ {
493
+ "data": {
494
+ "application/vnd.jupyter.widget-view+json": {
495
+ "model_id": "df37b47bf87746de81f69960f9cfe4e8",
496
+ "version_major": 2,
497
+ "version_minor": 0
498
+ },
499
+ "text/plain": [
500
+ "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
501
+ ]
502
+ },
503
+ "metadata": {},
504
+ "output_type": "display_data"
505
+ },
506
+ {
507
+ "data": {
508
+ "application/vnd.jupyter.widget-view+json": {
509
+ "model_id": "20da2b3cbbe44b2e8da8435d693d28f2",
510
+ "version_major": 2,
511
+ "version_minor": 0
512
+ },
513
+ "text/plain": [
514
+ "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
515
+ ]
516
+ },
517
+ "metadata": {},
518
+ "output_type": "display_data"
519
+ },
520
+ {
521
+ "data": {
522
+ "application/vnd.jupyter.widget-view+json": {
523
+ "model_id": "953085469ce34c21b5d9f55849780e25",
524
+ "version_major": 2,
525
+ "version_minor": 0
526
+ },
527
+ "text/plain": [
528
+ "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
529
+ ]
530
+ },
531
+ "metadata": {},
532
+ "output_type": "display_data"
533
+ },
534
+ {
535
+ "data": {
536
+ "application/vnd.jupyter.widget-view+json": {
537
+ "model_id": "4b4a15ec387b4ec3a2bcb652daa81651",
538
+ "version_major": 2,
539
+ "version_minor": 0
540
+ },
541
+ "text/plain": [
542
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
543
+ ]
544
+ },
545
+ "metadata": {},
546
+ "output_type": "display_data"
547
+ },
548
+ {
549
+ "data": {
550
+ "application/vnd.jupyter.widget-view+json": {
551
+ "model_id": "b3beba6e06114e45891d074c1df6bfc8",
552
+ "version_major": 2,
553
+ "version_minor": 0
554
+ },
555
+ "text/plain": [
556
+ "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
557
+ ]
558
+ },
559
+ "metadata": {},
560
+ "output_type": "display_data"
561
+ },
562
+ {
563
+ "data": {
564
+ "application/vnd.jupyter.widget-view+json": {
565
+ "model_id": "902a27cfae9843b5968b145adf334838",
566
+ "version_major": 2,
567
+ "version_minor": 0
568
+ },
569
+ "text/plain": [
570
+ "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
571
+ ]
572
+ },
573
+ "metadata": {},
574
+ "output_type": "display_data"
575
+ },
576
+ {
577
+ "data": {
578
+ "application/vnd.jupyter.widget-view+json": {
579
+ "model_id": "34bf777998ea4384841e047233995bd4",
580
+ "version_major": 2,
581
+ "version_minor": 0
582
+ },
583
+ "text/plain": [
584
+ "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
585
+ ]
586
+ },
587
+ "metadata": {},
588
+ "output_type": "display_data"
589
+ },
590
+ {
591
+ "data": {
592
+ "application/vnd.jupyter.widget-view+json": {
593
+ "model_id": "2be773b9220844fdbaf85ae04bcf6b5c",
594
+ "version_major": 2,
595
+ "version_minor": 0
596
+ },
597
+ "text/plain": [
598
+ "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
599
+ ]
600
+ },
601
+ "metadata": {},
602
+ "output_type": "display_data"
603
+ },
604
+ {
605
+ "data": {
606
+ "application/vnd.jupyter.widget-view+json": {
607
+ "model_id": "b651756e28e8438a921ab786a3b4ca68",
608
+ "version_major": 2,
609
+ "version_minor": 0
610
+ },
611
+ "text/plain": [
612
+ "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
613
+ ]
614
+ },
615
+ "metadata": {},
616
+ "output_type": "display_data"
617
+ },
618
+ {
619
+ "data": {
620
+ "application/vnd.jupyter.widget-view+json": {
621
+ "model_id": "382b072e12f347de8318abac597ae873",
622
+ "version_major": 2,
623
+ "version_minor": 0
624
+ },
625
+ "text/plain": [
626
+ "tokenizer.model: 0%| | 0.00/500k [00:00<?, ?B/s]"
627
+ ]
628
+ },
629
+ "metadata": {},
630
+ "output_type": "display_data"
631
+ },
632
+ {
633
+ "data": {
634
+ "text/plain": [
635
+ "'https://huggingface.co/nRuaif/Blind-test01/tree/main/'"
636
+ ]
637
+ },
638
+ "execution_count": 3,
639
+ "metadata": {},
640
+ "output_type": "execute_result"
641
+ }
642
+ ],
643
  "source": [
644
+ "%cd /workspace/axolotl\n",
645
  "from huggingface_hub import HfApi\n",
646
  "\n",
647
  "api = HfApi()\n",
 
651
  "\n",
652
  "api.upload_folder(\n",
653
  "\n",
654
+ " folder_path=r\"qlora-out-kimiko-test2\",\n",
655
  "\n",
656
+ " repo_id=\"nRuaif/Blind-test01\",\n",
657
  "\n",
658
+ " repo_type=\"model\",\n",
659
  "\n",
660
  ")"
661
  ]
662
+ },
663
+ {
664
+ "cell_type": "code",
665
+ "execution_count": null,
666
+ "id": "96533a13-2427-461d-ae0c-6dc81a157da4",
667
+ "metadata": {},
668
+ "outputs": [],
669
+ "source": []
670
  }
671
  ],
672
  "metadata": {
checkpoint-80/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.6.0.dev0
checkpoint-80/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": null,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj",
19
+ "up_proj",
20
+ "gate_proj",
21
+ "o_proj",
22
+ "k_proj",
23
+ "down_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
checkpoint-80/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b02b3dfbd0bca856c6ce4476f4c62193d7eba1ce714131f93fb51a2b17c45a46
3
+ size 62788109
checkpoint-80/adapter_model/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.6.0.dev0
checkpoint-80/adapter_model/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": null,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj",
19
+ "up_proj",
20
+ "gate_proj",
21
+ "o_proj",
22
+ "k_proj",
23
+ "down_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
checkpoint-80/adapter_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b02b3dfbd0bca856c6ce4476f4c62193d7eba1ce714131f93fb51a2b17c45a46
3
+ size 62788109
checkpoint-80/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3a5c22256bf7a260d63bca07fd35b019b55a13585d63f93eb2c1e660ce15f82
3
+ size 250681597
checkpoint-80/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bcedc0ae82cd3fd70ae5011bceda317cd35ef848603a65502df2f3c0b4a744
3
+ size 14575
checkpoint-80/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8da1a0d72c758025f2d5ff7e4ce93941f6437740a985863dfd164eb2e8ec130c
3
+ size 627
checkpoint-80/trainer_state.json ADDED
@@ -0,0 +1,563 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.0174262523651123,
3
+ "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-80",
4
+ "epoch": 2.0678513731825525,
5
+ "eval_steps": 10,
6
+ "global_step": 80,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 1e-05,
14
+ "loss": 1.5707,
15
+ "step": 1
16
+ },
17
+ {
18
+ "epoch": 0.05,
19
+ "learning_rate": 2e-05,
20
+ "loss": 1.5621,
21
+ "step": 2
22
+ },
23
+ {
24
+ "epoch": 0.08,
25
+ "learning_rate": 3e-05,
26
+ "loss": 1.4812,
27
+ "step": 3
28
+ },
29
+ {
30
+ "epoch": 0.1,
31
+ "learning_rate": 4e-05,
32
+ "loss": 1.5197,
33
+ "step": 4
34
+ },
35
+ {
36
+ "epoch": 0.13,
37
+ "learning_rate": 5e-05,
38
+ "loss": 1.5567,
39
+ "step": 5
40
+ },
41
+ {
42
+ "epoch": 0.16,
43
+ "learning_rate": 5e-05,
44
+ "loss": 1.4645,
45
+ "step": 6
46
+ },
47
+ {
48
+ "epoch": 0.18,
49
+ "learning_rate": 5e-05,
50
+ "loss": 1.6122,
51
+ "step": 7
52
+ },
53
+ {
54
+ "epoch": 0.21,
55
+ "learning_rate": 5e-05,
56
+ "loss": 1.5596,
57
+ "step": 8
58
+ },
59
+ {
60
+ "epoch": 0.23,
61
+ "learning_rate": 5e-05,
62
+ "loss": 1.5608,
63
+ "step": 9
64
+ },
65
+ {
66
+ "epoch": 0.26,
67
+ "learning_rate": 5e-05,
68
+ "loss": 1.5456,
69
+ "step": 10
70
+ },
71
+ {
72
+ "epoch": 0.26,
73
+ "eval_loss": 2.098437547683716,
74
+ "eval_runtime": 119.6161,
75
+ "eval_samples_per_second": 1.555,
76
+ "eval_steps_per_second": 0.777,
77
+ "step": 10
78
+ },
79
+ {
80
+ "epoch": 0.28,
81
+ "learning_rate": 5e-05,
82
+ "loss": 1.5645,
83
+ "step": 11
84
+ },
85
+ {
86
+ "epoch": 0.31,
87
+ "learning_rate": 5e-05,
88
+ "loss": 1.538,
89
+ "step": 12
90
+ },
91
+ {
92
+ "epoch": 0.34,
93
+ "learning_rate": 5e-05,
94
+ "loss": 1.6388,
95
+ "step": 13
96
+ },
97
+ {
98
+ "epoch": 0.36,
99
+ "learning_rate": 5e-05,
100
+ "loss": 1.4943,
101
+ "step": 14
102
+ },
103
+ {
104
+ "epoch": 0.39,
105
+ "learning_rate": 5e-05,
106
+ "loss": 1.5469,
107
+ "step": 15
108
+ },
109
+ {
110
+ "epoch": 0.41,
111
+ "learning_rate": 5e-05,
112
+ "loss": 1.6149,
113
+ "step": 16
114
+ },
115
+ {
116
+ "epoch": 0.44,
117
+ "learning_rate": 5e-05,
118
+ "loss": 1.5345,
119
+ "step": 17
120
+ },
121
+ {
122
+ "epoch": 0.47,
123
+ "learning_rate": 5e-05,
124
+ "loss": 1.4903,
125
+ "step": 18
126
+ },
127
+ {
128
+ "epoch": 0.49,
129
+ "learning_rate": 5e-05,
130
+ "loss": 1.5499,
131
+ "step": 19
132
+ },
133
+ {
134
+ "epoch": 0.52,
135
+ "learning_rate": 5e-05,
136
+ "loss": 1.5934,
137
+ "step": 20
138
+ },
139
+ {
140
+ "epoch": 0.52,
141
+ "eval_loss": 2.066981077194214,
142
+ "eval_runtime": 119.5781,
143
+ "eval_samples_per_second": 1.555,
144
+ "eval_steps_per_second": 0.778,
145
+ "step": 20
146
+ },
147
+ {
148
+ "epoch": 0.54,
149
+ "learning_rate": 5e-05,
150
+ "loss": 1.4554,
151
+ "step": 21
152
+ },
153
+ {
154
+ "epoch": 0.57,
155
+ "learning_rate": 5e-05,
156
+ "loss": 1.5512,
157
+ "step": 22
158
+ },
159
+ {
160
+ "epoch": 0.59,
161
+ "learning_rate": 5e-05,
162
+ "loss": 1.4636,
163
+ "step": 23
164
+ },
165
+ {
166
+ "epoch": 0.62,
167
+ "learning_rate": 5e-05,
168
+ "loss": 1.5398,
169
+ "step": 24
170
+ },
171
+ {
172
+ "epoch": 0.65,
173
+ "learning_rate": 5e-05,
174
+ "loss": 1.5623,
175
+ "step": 25
176
+ },
177
+ {
178
+ "epoch": 0.67,
179
+ "learning_rate": 5e-05,
180
+ "loss": 1.4658,
181
+ "step": 26
182
+ },
183
+ {
184
+ "epoch": 0.7,
185
+ "learning_rate": 5e-05,
186
+ "loss": 1.4723,
187
+ "step": 27
188
+ },
189
+ {
190
+ "epoch": 0.72,
191
+ "learning_rate": 5e-05,
192
+ "loss": 1.432,
193
+ "step": 28
194
+ },
195
+ {
196
+ "epoch": 0.75,
197
+ "learning_rate": 5e-05,
198
+ "loss": 1.4814,
199
+ "step": 29
200
+ },
201
+ {
202
+ "epoch": 0.78,
203
+ "learning_rate": 5e-05,
204
+ "loss": 1.4924,
205
+ "step": 30
206
+ },
207
+ {
208
+ "epoch": 0.78,
209
+ "eval_loss": 2.046339750289917,
210
+ "eval_runtime": 119.5771,
211
+ "eval_samples_per_second": 1.555,
212
+ "eval_steps_per_second": 0.778,
213
+ "step": 30
214
+ },
215
+ {
216
+ "epoch": 0.8,
217
+ "learning_rate": 5e-05,
218
+ "loss": 1.5809,
219
+ "step": 31
220
+ },
221
+ {
222
+ "epoch": 0.83,
223
+ "learning_rate": 5e-05,
224
+ "loss": 1.4803,
225
+ "step": 32
226
+ },
227
+ {
228
+ "epoch": 0.85,
229
+ "learning_rate": 5e-05,
230
+ "loss": 1.4878,
231
+ "step": 33
232
+ },
233
+ {
234
+ "epoch": 0.88,
235
+ "learning_rate": 5e-05,
236
+ "loss": 1.3871,
237
+ "step": 34
238
+ },
239
+ {
240
+ "epoch": 0.9,
241
+ "learning_rate": 5e-05,
242
+ "loss": 1.5151,
243
+ "step": 35
244
+ },
245
+ {
246
+ "epoch": 0.93,
247
+ "learning_rate": 5e-05,
248
+ "loss": 1.4212,
249
+ "step": 36
250
+ },
251
+ {
252
+ "epoch": 0.96,
253
+ "learning_rate": 5e-05,
254
+ "loss": 1.6284,
255
+ "step": 37
256
+ },
257
+ {
258
+ "epoch": 0.98,
259
+ "learning_rate": 5e-05,
260
+ "loss": 1.5002,
261
+ "step": 38
262
+ },
263
+ {
264
+ "epoch": 1.01,
265
+ "learning_rate": 5e-05,
266
+ "loss": 1.4452,
267
+ "step": 39
268
+ },
269
+ {
270
+ "epoch": 1.03,
271
+ "learning_rate": 5e-05,
272
+ "loss": 1.4399,
273
+ "step": 40
274
+ },
275
+ {
276
+ "epoch": 1.03,
277
+ "eval_loss": 2.0354697704315186,
278
+ "eval_runtime": 119.5875,
279
+ "eval_samples_per_second": 1.555,
280
+ "eval_steps_per_second": 0.778,
281
+ "step": 40
282
+ },
283
+ {
284
+ "epoch": 1.06,
285
+ "learning_rate": 5e-05,
286
+ "loss": 1.465,
287
+ "step": 41
288
+ },
289
+ {
290
+ "epoch": 1.09,
291
+ "learning_rate": 5e-05,
292
+ "loss": 1.4199,
293
+ "step": 42
294
+ },
295
+ {
296
+ "epoch": 1.11,
297
+ "learning_rate": 5e-05,
298
+ "loss": 1.5403,
299
+ "step": 43
300
+ },
301
+ {
302
+ "epoch": 1.14,
303
+ "learning_rate": 5e-05,
304
+ "loss": 1.4499,
305
+ "step": 44
306
+ },
307
+ {
308
+ "epoch": 1.16,
309
+ "learning_rate": 5e-05,
310
+ "loss": 1.5751,
311
+ "step": 45
312
+ },
313
+ {
314
+ "epoch": 1.19,
315
+ "learning_rate": 5e-05,
316
+ "loss": 1.4809,
317
+ "step": 46
318
+ },
319
+ {
320
+ "epoch": 1.21,
321
+ "learning_rate": 5e-05,
322
+ "loss": 1.5022,
323
+ "step": 47
324
+ },
325
+ {
326
+ "epoch": 1.24,
327
+ "learning_rate": 5e-05,
328
+ "loss": 1.4663,
329
+ "step": 48
330
+ },
331
+ {
332
+ "epoch": 1.27,
333
+ "learning_rate": 5e-05,
334
+ "loss": 1.4435,
335
+ "step": 49
336
+ },
337
+ {
338
+ "epoch": 1.29,
339
+ "learning_rate": 5e-05,
340
+ "loss": 1.4246,
341
+ "step": 50
342
+ },
343
+ {
344
+ "epoch": 1.29,
345
+ "eval_loss": 2.0276732444763184,
346
+ "eval_runtime": 119.5811,
347
+ "eval_samples_per_second": 1.555,
348
+ "eval_steps_per_second": 0.778,
349
+ "step": 50
350
+ },
351
+ {
352
+ "epoch": 1.32,
353
+ "learning_rate": 5e-05,
354
+ "loss": 1.4877,
355
+ "step": 51
356
+ },
357
+ {
358
+ "epoch": 1.34,
359
+ "learning_rate": 5e-05,
360
+ "loss": 1.4066,
361
+ "step": 52
362
+ },
363
+ {
364
+ "epoch": 1.37,
365
+ "learning_rate": 5e-05,
366
+ "loss": 1.3559,
367
+ "step": 53
368
+ },
369
+ {
370
+ "epoch": 1.4,
371
+ "learning_rate": 5e-05,
372
+ "loss": 1.5591,
373
+ "step": 54
374
+ },
375
+ {
376
+ "epoch": 1.42,
377
+ "learning_rate": 5e-05,
378
+ "loss": 1.4942,
379
+ "step": 55
380
+ },
381
+ {
382
+ "epoch": 1.45,
383
+ "learning_rate": 5e-05,
384
+ "loss": 1.4685,
385
+ "step": 56
386
+ },
387
+ {
388
+ "epoch": 1.47,
389
+ "learning_rate": 5e-05,
390
+ "loss": 1.4165,
391
+ "step": 57
392
+ },
393
+ {
394
+ "epoch": 1.5,
395
+ "learning_rate": 5e-05,
396
+ "loss": 1.3995,
397
+ "step": 58
398
+ },
399
+ {
400
+ "epoch": 1.53,
401
+ "learning_rate": 5e-05,
402
+ "loss": 1.3931,
403
+ "step": 59
404
+ },
405
+ {
406
+ "epoch": 1.55,
407
+ "learning_rate": 5e-05,
408
+ "loss": 1.4234,
409
+ "step": 60
410
+ },
411
+ {
412
+ "epoch": 1.55,
413
+ "eval_loss": 2.0228564739227295,
414
+ "eval_runtime": 119.5715,
415
+ "eval_samples_per_second": 1.556,
416
+ "eval_steps_per_second": 0.778,
417
+ "step": 60
418
+ },
419
+ {
420
+ "epoch": 1.58,
421
+ "learning_rate": 5e-05,
422
+ "loss": 1.4732,
423
+ "step": 61
424
+ },
425
+ {
426
+ "epoch": 1.6,
427
+ "learning_rate": 5e-05,
428
+ "loss": 1.4349,
429
+ "step": 62
430
+ },
431
+ {
432
+ "epoch": 1.63,
433
+ "learning_rate": 5e-05,
434
+ "loss": 1.4548,
435
+ "step": 63
436
+ },
437
+ {
438
+ "epoch": 1.65,
439
+ "learning_rate": 5e-05,
440
+ "loss": 1.48,
441
+ "step": 64
442
+ },
443
+ {
444
+ "epoch": 1.68,
445
+ "learning_rate": 5e-05,
446
+ "loss": 1.3789,
447
+ "step": 65
448
+ },
449
+ {
450
+ "epoch": 1.71,
451
+ "learning_rate": 5e-05,
452
+ "loss": 1.3915,
453
+ "step": 66
454
+ },
455
+ {
456
+ "epoch": 1.73,
457
+ "learning_rate": 5e-05,
458
+ "loss": 1.3789,
459
+ "step": 67
460
+ },
461
+ {
462
+ "epoch": 1.76,
463
+ "learning_rate": 5e-05,
464
+ "loss": 1.5206,
465
+ "step": 68
466
+ },
467
+ {
468
+ "epoch": 1.78,
469
+ "learning_rate": 5e-05,
470
+ "loss": 1.4851,
471
+ "step": 69
472
+ },
473
+ {
474
+ "epoch": 1.81,
475
+ "learning_rate": 5e-05,
476
+ "loss": 1.5251,
477
+ "step": 70
478
+ },
479
+ {
480
+ "epoch": 1.81,
481
+ "eval_loss": 2.0199856758117676,
482
+ "eval_runtime": 119.5994,
483
+ "eval_samples_per_second": 1.555,
484
+ "eval_steps_per_second": 0.778,
485
+ "step": 70
486
+ },
487
+ {
488
+ "epoch": 1.84,
489
+ "learning_rate": 5e-05,
490
+ "loss": 1.4152,
491
+ "step": 71
492
+ },
493
+ {
494
+ "epoch": 1.86,
495
+ "learning_rate": 5e-05,
496
+ "loss": 1.4262,
497
+ "step": 72
498
+ },
499
+ {
500
+ "epoch": 1.89,
501
+ "learning_rate": 5e-05,
502
+ "loss": 1.5563,
503
+ "step": 73
504
+ },
505
+ {
506
+ "epoch": 1.91,
507
+ "learning_rate": 5e-05,
508
+ "loss": 1.42,
509
+ "step": 74
510
+ },
511
+ {
512
+ "epoch": 1.94,
513
+ "learning_rate": 5e-05,
514
+ "loss": 1.4407,
515
+ "step": 75
516
+ },
517
+ {
518
+ "epoch": 1.96,
519
+ "learning_rate": 5e-05,
520
+ "loss": 1.3953,
521
+ "step": 76
522
+ },
523
+ {
524
+ "epoch": 1.99,
525
+ "learning_rate": 5e-05,
526
+ "loss": 1.4186,
527
+ "step": 77
528
+ },
529
+ {
530
+ "epoch": 2.02,
531
+ "learning_rate": 5e-05,
532
+ "loss": 1.3863,
533
+ "step": 78
534
+ },
535
+ {
536
+ "epoch": 2.04,
537
+ "learning_rate": 5e-05,
538
+ "loss": 1.3907,
539
+ "step": 79
540
+ },
541
+ {
542
+ "epoch": 2.07,
543
+ "learning_rate": 5e-05,
544
+ "loss": 1.5147,
545
+ "step": 80
546
+ },
547
+ {
548
+ "epoch": 2.07,
549
+ "eval_loss": 2.0174262523651123,
550
+ "eval_runtime": 119.6245,
551
+ "eval_samples_per_second": 1.555,
552
+ "eval_steps_per_second": 0.777,
553
+ "step": 80
554
+ }
555
+ ],
556
+ "logging_steps": 1,
557
+ "max_steps": 114,
558
+ "num_train_epochs": 3,
559
+ "save_steps": 10,
560
+ "total_flos": 3.667626583051469e+17,
561
+ "trial_name": null,
562
+ "trial_params": null
563
+ }
checkpoint-80/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
3
+ size 4219