DavidGF commited on
Commit
ff0e674
·
verified ·
1 Parent(s): 73f6898

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -614
trainer_state.json DELETED
@@ -1,614 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 5.217500628614534,
5
- "eval_steps": 500,
6
- "global_step": 41500,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.06286145335680161,
13
- "grad_norm": 2.0346406017779373e-06,
14
- "learning_rate": 1.9820395847551996e-05,
15
- "loss": 0.1532,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 0.12572290671360323,
20
- "grad_norm": 3.3444328437326476e-05,
21
- "learning_rate": 1.9640791695103994e-05,
22
- "loss": 0.116,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 0.18858436007040483,
27
- "grad_norm": 0.05133463069796562,
28
- "learning_rate": 1.9461187542655988e-05,
29
- "loss": 0.0844,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 0.25144581342720645,
34
- "grad_norm": 6.244395626708865e-05,
35
- "learning_rate": 1.9281583390207982e-05,
36
- "loss": 0.0183,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 0.31430726678400805,
41
- "grad_norm": 2.1667106011591386e-06,
42
- "learning_rate": 1.9101979237759977e-05,
43
- "loss": 0.0422,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 0.37716872014080965,
48
- "grad_norm": 0.003454663557931781,
49
- "learning_rate": 1.8922375085311975e-05,
50
- "loss": 0.0682,
51
- "step": 3000
52
- },
53
- {
54
- "epoch": 0.44003017349761125,
55
- "grad_norm": 9.719051740830764e-05,
56
- "learning_rate": 1.874277093286397e-05,
57
- "loss": 0.0394,
58
- "step": 3500
59
- },
60
- {
61
- "epoch": 0.5028916268544129,
62
- "grad_norm": 557.3906860351562,
63
- "learning_rate": 1.8563166780415963e-05,
64
- "loss": 0.0471,
65
- "step": 4000
66
- },
67
- {
68
- "epoch": 0.5657530802112145,
69
- "grad_norm": 0.07539618015289307,
70
- "learning_rate": 1.8383562627967958e-05,
71
- "loss": 0.018,
72
- "step": 4500
73
- },
74
- {
75
- "epoch": 0.6286145335680161,
76
- "grad_norm": 0.0003254573093727231,
77
- "learning_rate": 1.8203958475519955e-05,
78
- "loss": 0.0369,
79
- "step": 5000
80
- },
81
- {
82
- "epoch": 0.6914759869248177,
83
- "grad_norm": 2.24390605580993e-05,
84
- "learning_rate": 1.802435432307195e-05,
85
- "loss": 0.0571,
86
- "step": 5500
87
- },
88
- {
89
- "epoch": 0.7543374402816193,
90
- "grad_norm": 1.3767588825430721e-05,
91
- "learning_rate": 1.7844750170623944e-05,
92
- "loss": 0.0391,
93
- "step": 6000
94
- },
95
- {
96
- "epoch": 0.8171988936384209,
97
- "grad_norm": 0.004321901593357325,
98
- "learning_rate": 1.7665146018175942e-05,
99
- "loss": 0.0186,
100
- "step": 6500
101
- },
102
- {
103
- "epoch": 0.8800603469952225,
104
- "grad_norm": 2.2530954083777033e-05,
105
- "learning_rate": 1.7485541865727936e-05,
106
- "loss": 0.0138,
107
- "step": 7000
108
- },
109
- {
110
- "epoch": 0.9429218003520241,
111
- "grad_norm": 0.001634115818887949,
112
- "learning_rate": 1.7305937713279934e-05,
113
- "loss": 0.0164,
114
- "step": 7500
115
- },
116
- {
117
- "epoch": 1.0057832537088258,
118
- "grad_norm": 3.9654673855693545e-06,
119
- "learning_rate": 1.7126333560831928e-05,
120
- "loss": 0.0266,
121
- "step": 8000
122
- },
123
- {
124
- "epoch": 1.0686447070656273,
125
- "grad_norm": 1.5307747389670112e-06,
126
- "learning_rate": 1.6946729408383923e-05,
127
- "loss": 0.0064,
128
- "step": 8500
129
- },
130
- {
131
- "epoch": 1.131506160422429,
132
- "grad_norm": 0.004159221425652504,
133
- "learning_rate": 1.676712525593592e-05,
134
- "loss": 0.0376,
135
- "step": 9000
136
- },
137
- {
138
- "epoch": 1.1943676137792305,
139
- "grad_norm": 0.0006104345084168017,
140
- "learning_rate": 1.6587521103487915e-05,
141
- "loss": 0.0346,
142
- "step": 9500
143
- },
144
- {
145
- "epoch": 1.2572290671360322,
146
- "grad_norm": 4.2246105294907466e-05,
147
- "learning_rate": 1.640791695103991e-05,
148
- "loss": 0.0219,
149
- "step": 10000
150
- },
151
- {
152
- "epoch": 1.3200905204928337,
153
- "grad_norm": 4.186299065622734e-06,
154
- "learning_rate": 1.6228312798591907e-05,
155
- "loss": 0.0135,
156
- "step": 10500
157
- },
158
- {
159
- "epoch": 1.3829519738496354,
160
- "grad_norm": 0.00025950674898922443,
161
- "learning_rate": 1.60487086461439e-05,
162
- "loss": 0.0324,
163
- "step": 11000
164
- },
165
- {
166
- "epoch": 1.4458134272064371,
167
- "grad_norm": 5.111323480377905e-05,
168
- "learning_rate": 1.5869104493695896e-05,
169
- "loss": 0.0221,
170
- "step": 11500
171
- },
172
- {
173
- "epoch": 1.5086748805632386,
174
- "grad_norm": 0.0006640023784711957,
175
- "learning_rate": 1.5689500341247893e-05,
176
- "loss": 0.019,
177
- "step": 12000
178
- },
179
- {
180
- "epoch": 1.57153633392004,
181
- "grad_norm": 5.866236278961878e-06,
182
- "learning_rate": 1.5509896188799888e-05,
183
- "loss": 0.0243,
184
- "step": 12500
185
- },
186
- {
187
- "epoch": 1.6343977872768418,
188
- "grad_norm": 5.0747665227390826e-05,
189
- "learning_rate": 1.5330292036351882e-05,
190
- "loss": 0.0199,
191
- "step": 13000
192
- },
193
- {
194
- "epoch": 1.6972592406336435,
195
- "grad_norm": 0.0017497913213446736,
196
- "learning_rate": 1.5150687883903876e-05,
197
- "loss": 0.0142,
198
- "step": 13500
199
- },
200
- {
201
- "epoch": 1.760120693990445,
202
- "grad_norm": 4.546816853689961e-05,
203
- "learning_rate": 1.4971083731455874e-05,
204
- "loss": 0.0336,
205
- "step": 14000
206
- },
207
- {
208
- "epoch": 1.8229821473472465,
209
- "grad_norm": 0.005370904225856066,
210
- "learning_rate": 1.4791479579007868e-05,
211
- "loss": 0.0269,
212
- "step": 14500
213
- },
214
- {
215
- "epoch": 1.8858436007040482,
216
- "grad_norm": 0.0037405244074761868,
217
- "learning_rate": 1.4611875426559863e-05,
218
- "loss": 0.009,
219
- "step": 15000
220
- },
221
- {
222
- "epoch": 1.94870505406085,
223
- "grad_norm": 0.003770838025957346,
224
- "learning_rate": 1.4432271274111857e-05,
225
- "loss": 0.0115,
226
- "step": 15500
227
- },
228
- {
229
- "epoch": 2.0115665074176516,
230
- "grad_norm": 0.10231239348649979,
231
- "learning_rate": 1.4252667121663855e-05,
232
- "loss": 0.0155,
233
- "step": 16000
234
- },
235
- {
236
- "epoch": 2.074427960774453,
237
- "grad_norm": 0.00043545279186218977,
238
- "learning_rate": 1.407306296921585e-05,
239
- "loss": 0.0161,
240
- "step": 16500
241
- },
242
- {
243
- "epoch": 2.1372894141312546,
244
- "grad_norm": 0.0013906797394156456,
245
- "learning_rate": 1.3893458816767844e-05,
246
- "loss": 0.0111,
247
- "step": 17000
248
- },
249
- {
250
- "epoch": 2.2001508674880563,
251
- "grad_norm": 0.03360624238848686,
252
- "learning_rate": 1.3713854664319841e-05,
253
- "loss": 0.0043,
254
- "step": 17500
255
- },
256
- {
257
- "epoch": 2.263012320844858,
258
- "grad_norm": 2.512960600142833e-05,
259
- "learning_rate": 1.3534250511871836e-05,
260
- "loss": 0.0075,
261
- "step": 18000
262
- },
263
- {
264
- "epoch": 2.3258737742016597,
265
- "grad_norm": 0.0004499895148910582,
266
- "learning_rate": 1.335464635942383e-05,
267
- "loss": 0.0021,
268
- "step": 18500
269
- },
270
- {
271
- "epoch": 2.388735227558461,
272
- "grad_norm": 0.00035007469705305994,
273
- "learning_rate": 1.3175042206975826e-05,
274
- "loss": 0.0093,
275
- "step": 19000
276
- },
277
- {
278
- "epoch": 2.4515966809152627,
279
- "grad_norm": 0.001043146476149559,
280
- "learning_rate": 1.2995438054527822e-05,
281
- "loss": 0.0098,
282
- "step": 19500
283
- },
284
- {
285
- "epoch": 2.5144581342720644,
286
- "grad_norm": 0.0009557796292938292,
287
- "learning_rate": 1.2815833902079816e-05,
288
- "loss": 0.0111,
289
- "step": 20000
290
- },
291
- {
292
- "epoch": 2.5773195876288657,
293
- "grad_norm": 0.00020876774215139449,
294
- "learning_rate": 1.2636229749631813e-05,
295
- "loss": 0.0,
296
- "step": 20500
297
- },
298
- {
299
- "epoch": 2.6401810409856674,
300
- "grad_norm": 0.00011447193537605926,
301
- "learning_rate": 1.2456625597183807e-05,
302
- "loss": 0.0119,
303
- "step": 21000
304
- },
305
- {
306
- "epoch": 2.703042494342469,
307
- "grad_norm": 0.00028604810358956456,
308
- "learning_rate": 1.2277021444735805e-05,
309
- "loss": 0.0112,
310
- "step": 21500
311
- },
312
- {
313
- "epoch": 2.765903947699271,
314
- "grad_norm": 2.5067993192351423e-05,
315
- "learning_rate": 1.2097417292287799e-05,
316
- "loss": 0.0094,
317
- "step": 22000
318
- },
319
- {
320
- "epoch": 2.8287654010560725,
321
- "grad_norm": 0.00034995036548934877,
322
- "learning_rate": 1.1917813139839793e-05,
323
- "loss": 0.0088,
324
- "step": 22500
325
- },
326
- {
327
- "epoch": 2.8916268544128743,
328
- "grad_norm": 1.091743524739286e-05,
329
- "learning_rate": 1.1738208987391791e-05,
330
- "loss": 0.0,
331
- "step": 23000
332
- },
333
- {
334
- "epoch": 2.9544883077696755,
335
- "grad_norm": 0.00016331751248799264,
336
- "learning_rate": 1.1558604834943785e-05,
337
- "loss": 0.0161,
338
- "step": 23500
339
- },
340
- {
341
- "epoch": 3.0173497611264772,
342
- "grad_norm": 4.763499600812793e-05,
343
- "learning_rate": 1.137900068249578e-05,
344
- "loss": 0.005,
345
- "step": 24000
346
- },
347
- {
348
- "epoch": 3.080211214483279,
349
- "grad_norm": 0.00027739102370105684,
350
- "learning_rate": 1.1199396530047774e-05,
351
- "loss": 0.008,
352
- "step": 24500
353
- },
354
- {
355
- "epoch": 3.1430726678400807,
356
- "grad_norm": 0.001488861278630793,
357
- "learning_rate": 1.1019792377599772e-05,
358
- "loss": 0.0047,
359
- "step": 25000
360
- },
361
- {
362
- "epoch": 3.205934121196882,
363
- "grad_norm": 0.00010821361502166837,
364
- "learning_rate": 1.0840188225151766e-05,
365
- "loss": 0.001,
366
- "step": 25500
367
- },
368
- {
369
- "epoch": 3.2687955745536836,
370
- "grad_norm": 7.778286089887843e-05,
371
- "learning_rate": 1.066058407270376e-05,
372
- "loss": 0.006,
373
- "step": 26000
374
- },
375
- {
376
- "epoch": 3.3316570279104853,
377
- "grad_norm": 0.003971356898546219,
378
- "learning_rate": 1.0480979920255758e-05,
379
- "loss": 0.0037,
380
- "step": 26500
381
- },
382
- {
383
- "epoch": 3.394518481267287,
384
- "grad_norm": 0.0018279771320521832,
385
- "learning_rate": 1.0301375767807753e-05,
386
- "loss": 0.007,
387
- "step": 27000
388
- },
389
- {
390
- "epoch": 3.4573799346240883,
391
- "grad_norm": 2.947276516351849e-05,
392
- "learning_rate": 1.0121771615359747e-05,
393
- "loss": 0.0033,
394
- "step": 27500
395
- },
396
- {
397
- "epoch": 3.52024138798089,
398
- "grad_norm": 0.0054627698846161366,
399
- "learning_rate": 9.942167462911743e-06,
400
- "loss": 0.0029,
401
- "step": 28000
402
- },
403
- {
404
- "epoch": 3.5831028413376917,
405
- "grad_norm": 6.94687114446424e-05,
406
- "learning_rate": 9.762563310463739e-06,
407
- "loss": 0.0023,
408
- "step": 28500
409
- },
410
- {
411
- "epoch": 3.6459642946944935,
412
- "grad_norm": 2.092416980303824e-05,
413
- "learning_rate": 9.582959158015735e-06,
414
- "loss": 0.0043,
415
- "step": 29000
416
- },
417
- {
418
- "epoch": 3.7088257480512947,
419
- "grad_norm": 0.00010166186257265508,
420
- "learning_rate": 9.40335500556773e-06,
421
- "loss": 0.0067,
422
- "step": 29500
423
- },
424
- {
425
- "epoch": 3.7716872014080964,
426
- "grad_norm": 2.6766716473503038e-05,
427
- "learning_rate": 9.223750853119726e-06,
428
- "loss": 0.0052,
429
- "step": 30000
430
- },
431
- {
432
- "epoch": 3.834548654764898,
433
- "grad_norm": 2.3570633857161738e-05,
434
- "learning_rate": 9.04414670067172e-06,
435
- "loss": 0.0026,
436
- "step": 30500
437
- },
438
- {
439
- "epoch": 3.8974101081217,
440
- "grad_norm": 0.00028839215519838035,
441
- "learning_rate": 8.864542548223716e-06,
442
- "loss": 0.015,
443
- "step": 31000
444
- },
445
- {
446
- "epoch": 3.9602715614785016,
447
- "grad_norm": 5.503819693331025e-07,
448
- "learning_rate": 8.68493839577571e-06,
449
- "loss": 0.0032,
450
- "step": 31500
451
- },
452
- {
453
- "epoch": 4.023133014835303,
454
- "grad_norm": 0.019892163574695587,
455
- "learning_rate": 8.505334243327706e-06,
456
- "loss": 0.0068,
457
- "step": 32000
458
- },
459
- {
460
- "epoch": 4.085994468192105,
461
- "grad_norm": 9.153469727607444e-05,
462
- "learning_rate": 8.325730090879702e-06,
463
- "loss": 0.0047,
464
- "step": 32500
465
- },
466
- {
467
- "epoch": 4.148855921548906,
468
- "grad_norm": 0.0040658023208379745,
469
- "learning_rate": 8.146125938431697e-06,
470
- "loss": 0.0019,
471
- "step": 33000
472
- },
473
- {
474
- "epoch": 4.2117173749057075,
475
- "grad_norm": 2.282762579852715e-05,
476
- "learning_rate": 7.966521785983693e-06,
477
- "loss": 0.0035,
478
- "step": 33500
479
- },
480
- {
481
- "epoch": 4.274578828262509,
482
- "grad_norm": 5.513780706678517e-06,
483
- "learning_rate": 7.786917633535687e-06,
484
- "loss": 0.005,
485
- "step": 34000
486
- },
487
- {
488
- "epoch": 4.337440281619311,
489
- "grad_norm": 0.008914977312088013,
490
- "learning_rate": 7.607313481087683e-06,
491
- "loss": 0.0045,
492
- "step": 34500
493
- },
494
- {
495
- "epoch": 4.400301734976113,
496
- "grad_norm": 6.84509941493161e-05,
497
- "learning_rate": 7.4277093286396784e-06,
498
- "loss": 0.0003,
499
- "step": 35000
500
- },
501
- {
502
- "epoch": 4.463163188332914,
503
- "grad_norm": 1.599622737558093e-05,
504
- "learning_rate": 7.2481051761916745e-06,
505
- "loss": 0.0,
506
- "step": 35500
507
- },
508
- {
509
- "epoch": 4.526024641689716,
510
- "grad_norm": 1.801977305149194e-05,
511
- "learning_rate": 7.068501023743669e-06,
512
- "loss": 0.0,
513
- "step": 36000
514
- },
515
- {
516
- "epoch": 4.588886095046518,
517
- "grad_norm": 1.085722124116728e-05,
518
- "learning_rate": 6.888896871295665e-06,
519
- "loss": 0.0044,
520
- "step": 36500
521
- },
522
- {
523
- "epoch": 4.6517475484033195,
524
- "grad_norm": 2.5041081244125962e-05,
525
- "learning_rate": 6.70929271884766e-06,
526
- "loss": 0.0087,
527
- "step": 37000
528
- },
529
- {
530
- "epoch": 4.71460900176012,
531
- "grad_norm": 0.0013843434862792492,
532
- "learning_rate": 6.529688566399655e-06,
533
- "loss": 0.0052,
534
- "step": 37500
535
- },
536
- {
537
- "epoch": 4.777470455116922,
538
- "grad_norm": 0.00015471581718884408,
539
- "learning_rate": 6.350084413951651e-06,
540
- "loss": 0.0015,
541
- "step": 38000
542
- },
543
- {
544
- "epoch": 4.840331908473724,
545
- "grad_norm": 0.00010056547034764662,
546
- "learning_rate": 6.1704802615036465e-06,
547
- "loss": 0.0001,
548
- "step": 38500
549
- },
550
- {
551
- "epoch": 4.9031933618305255,
552
- "grad_norm": 2.1669136913260445e-05,
553
- "learning_rate": 5.990876109055643e-06,
554
- "loss": 0.0,
555
- "step": 39000
556
- },
557
- {
558
- "epoch": 4.966054815187327,
559
- "grad_norm": 3.164667577948421e-05,
560
- "learning_rate": 5.811271956607637e-06,
561
- "loss": 0.0,
562
- "step": 39500
563
- },
564
- {
565
- "epoch": 5.028916268544129,
566
- "grad_norm": 3.5525452403817326e-05,
567
- "learning_rate": 5.631667804159633e-06,
568
- "loss": 0.0,
569
- "step": 40000
570
- },
571
- {
572
- "epoch": 5.091777721900931,
573
- "grad_norm": 1.85646513273241e-05,
574
- "learning_rate": 5.452063651711627e-06,
575
- "loss": 0.0,
576
- "step": 40500
577
- },
578
- {
579
- "epoch": 5.154639175257732,
580
- "grad_norm": 1.2565506040118635e-05,
581
- "learning_rate": 5.272459499263623e-06,
582
- "loss": 0.0,
583
- "step": 41000
584
- },
585
- {
586
- "epoch": 5.217500628614534,
587
- "grad_norm": 3.7605339002766414e-06,
588
- "learning_rate": 5.092855346815619e-06,
589
- "loss": 0.0,
590
- "step": 41500
591
- }
592
- ],
593
- "logging_steps": 500,
594
- "max_steps": 55678,
595
- "num_input_tokens_seen": 0,
596
- "num_train_epochs": 7,
597
- "save_steps": 500,
598
- "stateful_callbacks": {
599
- "TrainerControl": {
600
- "args": {
601
- "should_epoch_stop": false,
602
- "should_evaluate": false,
603
- "should_log": false,
604
- "should_save": true,
605
- "should_training_stop": false
606
- },
607
- "attributes": {}
608
- }
609
- },
610
- "total_flos": 2.3565764455911014e+17,
611
- "train_batch_size": 6,
612
- "trial_name": null,
613
- "trial_params": null
614
- }