CocoRoF commited on
Commit
d20677c
·
verified ·
1 Parent(s): 0c6c875

Training in progress, step 13748, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cf83fcc35cbeccda29e41e5a239a30c9878fff2ed12f6202687d1cc78a53d33
3
  size 737582948
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9429983e59f652175f71152fba6eaf3af3a03dcccaed4b1c0446ada02b2b54e6
3
  size 737582948
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a822201578594008224879f74b2b4f9407c3b6d910a5d9f8150dfb57e55f9839
3
  size 1475256250
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e8f963dd44ad4b5a4ff6a887f814dc448e58639e52eefe4e323265991e6b18d
3
  size 1475256250
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aab9b88403a4870612524aeb22edbec848f22712de7dc2dc2c2e5d5a61fd5fa2
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75615b5e6cc125bb94988b3c50b73a5f8c3305643e30a3d5b2f3189a2032ba16
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac04a7bb6b195f0524aef30982df54700583a952ce3f364f2ebb726a2283cb83
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2e8e6885d573427d2de37a77bf587fa112946ff22d3ea4df32210439a557a5b
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:200a8689b4395a1bbe5786c6e017cc00f974df43d64b67ff1fd65d86cee2eac5
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc6d8d5bb2a96a1ebfb5cf92fac012f69410a414ce89ccd7c5ae11f14e596fa
3
+ size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0b66504d38448386cda55d409ea77b9c193882e1d9e50fc8e75e03505e47e07
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b1ad1f84976b61e4cfaae51278742d669e0df2692aced4131064ecd61c1edf2
3
  size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24bd01e4934833714e50683db598cf170e089ac7345314487bab1e91a26e5fd6
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35fafd6395e4cb387bb75fb28a0482502f9e17f6c3b0e3e256daf180373b3f0b
3
  size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10568452053cd042809865062f1c3b6117b4f9cb2a36138830ab329649de4c5d
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c89bb33c92eb59bfef32b9537aa0cfa50296c7262cfdb9eb91256dc5b5e9f3
3
  size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:028fbfed0ea057f7cba505b85aabb9f8f3300e1ca5de4c9738be9eb5fb7f4bfb
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32dfdc872866fda5b64b7229bac1e43cf4fe2356a4c82d10a2502643547790ec
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5340b47a03039876c313687a97ae0b0c4c4f3b0059c1b4266e40d507f87b999
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a48787eaef9585df14b508d1097c445291248a545d320eeaf26f46b061d496a
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da5a3d0f3b47665abb770a3493b11c8bcfaffceef746f758a329ed422fd0cb0c
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2813db161368db76429d904a036e1161875e895320a4ce21cc6fa1fdd51aa271
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.616079810339067,
5
  "eval_steps": 500,
6
- "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9107,6 +9107,524 @@
9107
  "learning_rate": 4.981531808618395e-05,
9108
  "loss": 0.6818,
9109
  "step": 13000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9110
  }
9111
  ],
9112
  "logging_steps": 10,
@@ -9121,7 +9639,7 @@
9121
  "should_evaluate": false,
9122
  "should_log": false,
9123
  "should_save": true,
9124
- "should_training_stop": false
9125
  },
9126
  "attributes": {}
9127
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.99692914763958,
5
  "eval_steps": 500,
6
+ "global_step": 13748,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9107
  "learning_rate": 4.981531808618395e-05,
9108
  "loss": 0.6818,
9109
  "step": 13000
9110
+ },
9111
+ {
9112
+ "epoch": 6.621171379019555,
9113
+ "grad_norm": 0.13289377093315125,
9114
+ "learning_rate": 4.981517602317332e-05,
9115
+ "loss": 0.6811,
9116
+ "step": 13010
9117
+ },
9118
+ {
9119
+ "epoch": 6.626262947700043,
9120
+ "grad_norm": 0.18308168649673462,
9121
+ "learning_rate": 4.9815033960162695e-05,
9122
+ "loss": 0.678,
9123
+ "step": 13020
9124
+ },
9125
+ {
9126
+ "epoch": 6.631354516380531,
9127
+ "grad_norm": 0.12425180524587631,
9128
+ "learning_rate": 4.981489189715207e-05,
9129
+ "loss": 0.6816,
9130
+ "step": 13030
9131
+ },
9132
+ {
9133
+ "epoch": 6.636446085061019,
9134
+ "grad_norm": 0.13754673302173615,
9135
+ "learning_rate": 4.981474983414144e-05,
9136
+ "loss": 0.6773,
9137
+ "step": 13040
9138
+ },
9139
+ {
9140
+ "epoch": 6.641537653741508,
9141
+ "grad_norm": 0.15316608548164368,
9142
+ "learning_rate": 4.9814607771130814e-05,
9143
+ "loss": 0.6765,
9144
+ "step": 13050
9145
+ },
9146
+ {
9147
+ "epoch": 6.646629222421995,
9148
+ "grad_norm": 0.136078342795372,
9149
+ "learning_rate": 4.981446570812018e-05,
9150
+ "loss": 0.6767,
9151
+ "step": 13060
9152
+ },
9153
+ {
9154
+ "epoch": 6.651720791102484,
9155
+ "grad_norm": 0.12898576259613037,
9156
+ "learning_rate": 4.9814323645109554e-05,
9157
+ "loss": 0.6786,
9158
+ "step": 13070
9159
+ },
9160
+ {
9161
+ "epoch": 6.656812359782972,
9162
+ "grad_norm": 0.11854422837495804,
9163
+ "learning_rate": 4.981418158209893e-05,
9164
+ "loss": 0.6806,
9165
+ "step": 13080
9166
+ },
9167
+ {
9168
+ "epoch": 6.66190392846346,
9169
+ "grad_norm": 0.1517888456583023,
9170
+ "learning_rate": 4.98140395190883e-05,
9171
+ "loss": 0.6829,
9172
+ "step": 13090
9173
+ },
9174
+ {
9175
+ "epoch": 6.666995497143948,
9176
+ "grad_norm": 0.1091533899307251,
9177
+ "learning_rate": 4.9813897456077666e-05,
9178
+ "loss": 0.6774,
9179
+ "step": 13100
9180
+ },
9181
+ {
9182
+ "epoch": 6.672087065824436,
9183
+ "grad_norm": 0.13526228070259094,
9184
+ "learning_rate": 4.981375539306704e-05,
9185
+ "loss": 0.6747,
9186
+ "step": 13110
9187
+ },
9188
+ {
9189
+ "epoch": 6.677178634504925,
9190
+ "grad_norm": 0.144491046667099,
9191
+ "learning_rate": 4.981361333005641e-05,
9192
+ "loss": 0.6787,
9193
+ "step": 13120
9194
+ },
9195
+ {
9196
+ "epoch": 6.682270203185412,
9197
+ "grad_norm": 0.16958777606487274,
9198
+ "learning_rate": 4.9813471267045786e-05,
9199
+ "loss": 0.6744,
9200
+ "step": 13130
9201
+ },
9202
+ {
9203
+ "epoch": 6.687361771865901,
9204
+ "grad_norm": 0.14115367829799652,
9205
+ "learning_rate": 4.981332920403516e-05,
9206
+ "loss": 0.6791,
9207
+ "step": 13140
9208
+ },
9209
+ {
9210
+ "epoch": 6.692453340546389,
9211
+ "grad_norm": 0.11081673204898834,
9212
+ "learning_rate": 4.981318714102453e-05,
9213
+ "loss": 0.6795,
9214
+ "step": 13150
9215
+ },
9216
+ {
9217
+ "epoch": 6.6975449092268775,
9218
+ "grad_norm": 0.14843027293682098,
9219
+ "learning_rate": 4.9813045078013905e-05,
9220
+ "loss": 0.6807,
9221
+ "step": 13160
9222
+ },
9223
+ {
9224
+ "epoch": 6.702636477907365,
9225
+ "grad_norm": 0.12543180584907532,
9226
+ "learning_rate": 4.981290301500328e-05,
9227
+ "loss": 0.6778,
9228
+ "step": 13170
9229
+ },
9230
+ {
9231
+ "epoch": 6.707728046587853,
9232
+ "grad_norm": 0.13169404864311218,
9233
+ "learning_rate": 4.981276095199265e-05,
9234
+ "loss": 0.675,
9235
+ "step": 13180
9236
+ },
9237
+ {
9238
+ "epoch": 6.712819615268342,
9239
+ "grad_norm": 0.15343239903450012,
9240
+ "learning_rate": 4.9812618888982024e-05,
9241
+ "loss": 0.6819,
9242
+ "step": 13190
9243
+ },
9244
+ {
9245
+ "epoch": 6.7179111839488295,
9246
+ "grad_norm": 0.13029424846172333,
9247
+ "learning_rate": 4.981247682597139e-05,
9248
+ "loss": 0.6778,
9249
+ "step": 13200
9250
+ },
9251
+ {
9252
+ "epoch": 6.723002752629318,
9253
+ "grad_norm": 0.11084284633398056,
9254
+ "learning_rate": 4.9812334762960764e-05,
9255
+ "loss": 0.6824,
9256
+ "step": 13210
9257
+ },
9258
+ {
9259
+ "epoch": 6.728094321309806,
9260
+ "grad_norm": 0.11253423988819122,
9261
+ "learning_rate": 4.981219269995014e-05,
9262
+ "loss": 0.6798,
9263
+ "step": 13220
9264
+ },
9265
+ {
9266
+ "epoch": 6.7331858899902945,
9267
+ "grad_norm": 0.1311793029308319,
9268
+ "learning_rate": 4.981205063693951e-05,
9269
+ "loss": 0.6814,
9270
+ "step": 13230
9271
+ },
9272
+ {
9273
+ "epoch": 6.738277458670782,
9274
+ "grad_norm": 0.12919209897518158,
9275
+ "learning_rate": 4.981190857392888e-05,
9276
+ "loss": 0.6768,
9277
+ "step": 13240
9278
+ },
9279
+ {
9280
+ "epoch": 6.743369027351271,
9281
+ "grad_norm": 0.12355062365531921,
9282
+ "learning_rate": 4.9811766510918256e-05,
9283
+ "loss": 0.6799,
9284
+ "step": 13250
9285
+ },
9286
+ {
9287
+ "epoch": 6.748460596031759,
9288
+ "grad_norm": 0.1338970810174942,
9289
+ "learning_rate": 4.981162444790763e-05,
9290
+ "loss": 0.6771,
9291
+ "step": 13260
9292
+ },
9293
+ {
9294
+ "epoch": 6.7535521647122465,
9295
+ "grad_norm": 0.14117179811000824,
9296
+ "learning_rate": 4.9811482384897e-05,
9297
+ "loss": 0.6799,
9298
+ "step": 13270
9299
+ },
9300
+ {
9301
+ "epoch": 6.758643733392735,
9302
+ "grad_norm": 0.1848529875278473,
9303
+ "learning_rate": 4.9811340321886375e-05,
9304
+ "loss": 0.6755,
9305
+ "step": 13280
9306
+ },
9307
+ {
9308
+ "epoch": 6.763735302073223,
9309
+ "grad_norm": 0.1720336526632309,
9310
+ "learning_rate": 4.981119825887575e-05,
9311
+ "loss": 0.67,
9312
+ "step": 13290
9313
+ },
9314
+ {
9315
+ "epoch": 6.768826870753712,
9316
+ "grad_norm": 0.1607787162065506,
9317
+ "learning_rate": 4.981105619586512e-05,
9318
+ "loss": 0.6827,
9319
+ "step": 13300
9320
+ },
9321
+ {
9322
+ "epoch": 6.773918439434199,
9323
+ "grad_norm": 0.14998158812522888,
9324
+ "learning_rate": 4.981091413285449e-05,
9325
+ "loss": 0.6759,
9326
+ "step": 13310
9327
+ },
9328
+ {
9329
+ "epoch": 6.779010008114687,
9330
+ "grad_norm": 0.11763730645179749,
9331
+ "learning_rate": 4.981077206984386e-05,
9332
+ "loss": 0.6747,
9333
+ "step": 13320
9334
+ },
9335
+ {
9336
+ "epoch": 6.784101576795176,
9337
+ "grad_norm": 0.12859204411506653,
9338
+ "learning_rate": 4.9810630006833234e-05,
9339
+ "loss": 0.6785,
9340
+ "step": 13330
9341
+ },
9342
+ {
9343
+ "epoch": 6.7891931454756635,
9344
+ "grad_norm": 0.12227821350097656,
9345
+ "learning_rate": 4.98104879438226e-05,
9346
+ "loss": 0.6794,
9347
+ "step": 13340
9348
+ },
9349
+ {
9350
+ "epoch": 6.794284714156152,
9351
+ "grad_norm": 0.11308576911687851,
9352
+ "learning_rate": 4.9810345880811974e-05,
9353
+ "loss": 0.6777,
9354
+ "step": 13350
9355
+ },
9356
+ {
9357
+ "epoch": 6.79937628283664,
9358
+ "grad_norm": 0.12252433598041534,
9359
+ "learning_rate": 4.981020381780135e-05,
9360
+ "loss": 0.6778,
9361
+ "step": 13360
9362
+ },
9363
+ {
9364
+ "epoch": 6.804467851517129,
9365
+ "grad_norm": 0.11951456218957901,
9366
+ "learning_rate": 4.981006175479072e-05,
9367
+ "loss": 0.6778,
9368
+ "step": 13370
9369
+ },
9370
+ {
9371
+ "epoch": 6.809559420197616,
9372
+ "grad_norm": 0.13758736848831177,
9373
+ "learning_rate": 4.980991969178009e-05,
9374
+ "loss": 0.6757,
9375
+ "step": 13380
9376
+ },
9377
+ {
9378
+ "epoch": 6.814650988878105,
9379
+ "grad_norm": 0.15930655598640442,
9380
+ "learning_rate": 4.9809777628769466e-05,
9381
+ "loss": 0.675,
9382
+ "step": 13390
9383
+ },
9384
+ {
9385
+ "epoch": 6.819742557558593,
9386
+ "grad_norm": 0.16790159046649933,
9387
+ "learning_rate": 4.980963556575884e-05,
9388
+ "loss": 0.6685,
9389
+ "step": 13400
9390
+ },
9391
+ {
9392
+ "epoch": 6.824834126239081,
9393
+ "grad_norm": 0.1681044101715088,
9394
+ "learning_rate": 4.980949350274821e-05,
9395
+ "loss": 0.683,
9396
+ "step": 13410
9397
+ },
9398
+ {
9399
+ "epoch": 6.829925694919569,
9400
+ "grad_norm": 0.1336173415184021,
9401
+ "learning_rate": 4.9809351439737585e-05,
9402
+ "loss": 0.6746,
9403
+ "step": 13420
9404
+ },
9405
+ {
9406
+ "epoch": 6.835017263600057,
9407
+ "grad_norm": 0.11793011426925659,
9408
+ "learning_rate": 4.980920937672696e-05,
9409
+ "loss": 0.6789,
9410
+ "step": 13430
9411
+ },
9412
+ {
9413
+ "epoch": 6.840108832280546,
9414
+ "grad_norm": 0.14056985080242157,
9415
+ "learning_rate": 4.980906731371633e-05,
9416
+ "loss": 0.6797,
9417
+ "step": 13440
9418
+ },
9419
+ {
9420
+ "epoch": 6.845200400961033,
9421
+ "grad_norm": 0.11312086880207062,
9422
+ "learning_rate": 4.9808925250705705e-05,
9423
+ "loss": 0.6777,
9424
+ "step": 13450
9425
+ },
9426
+ {
9427
+ "epoch": 6.850291969641522,
9428
+ "grad_norm": 0.14550986886024475,
9429
+ "learning_rate": 4.980878318769507e-05,
9430
+ "loss": 0.6792,
9431
+ "step": 13460
9432
+ },
9433
+ {
9434
+ "epoch": 6.85538353832201,
9435
+ "grad_norm": 0.13276565074920654,
9436
+ "learning_rate": 4.9808641124684444e-05,
9437
+ "loss": 0.6797,
9438
+ "step": 13470
9439
+ },
9440
+ {
9441
+ "epoch": 6.8604751070024985,
9442
+ "grad_norm": 0.1404767632484436,
9443
+ "learning_rate": 4.980849906167382e-05,
9444
+ "loss": 0.6767,
9445
+ "step": 13480
9446
+ },
9447
+ {
9448
+ "epoch": 6.865566675682986,
9449
+ "grad_norm": 0.11344119906425476,
9450
+ "learning_rate": 4.980835699866319e-05,
9451
+ "loss": 0.6779,
9452
+ "step": 13490
9453
+ },
9454
+ {
9455
+ "epoch": 6.870658244363474,
9456
+ "grad_norm": 0.18248707056045532,
9457
+ "learning_rate": 4.9808214935652563e-05,
9458
+ "loss": 0.6819,
9459
+ "step": 13500
9460
+ },
9461
+ {
9462
+ "epoch": 6.875749813043963,
9463
+ "grad_norm": 0.13696008920669556,
9464
+ "learning_rate": 4.9808072872641937e-05,
9465
+ "loss": 0.6789,
9466
+ "step": 13510
9467
+ },
9468
+ {
9469
+ "epoch": 6.8808413817244505,
9470
+ "grad_norm": 0.1089053824543953,
9471
+ "learning_rate": 4.98079308096313e-05,
9472
+ "loss": 0.6833,
9473
+ "step": 13520
9474
+ },
9475
+ {
9476
+ "epoch": 6.885932950404939,
9477
+ "grad_norm": 0.13730046153068542,
9478
+ "learning_rate": 4.9807788746620676e-05,
9479
+ "loss": 0.685,
9480
+ "step": 13530
9481
+ },
9482
+ {
9483
+ "epoch": 6.891024519085427,
9484
+ "grad_norm": 0.11708593368530273,
9485
+ "learning_rate": 4.980764668361005e-05,
9486
+ "loss": 0.6797,
9487
+ "step": 13540
9488
+ },
9489
+ {
9490
+ "epoch": 6.896116087765915,
9491
+ "grad_norm": 0.14479976892471313,
9492
+ "learning_rate": 4.980750462059942e-05,
9493
+ "loss": 0.6779,
9494
+ "step": 13550
9495
+ },
9496
+ {
9497
+ "epoch": 6.901207656446403,
9498
+ "grad_norm": 0.13402192294597626,
9499
+ "learning_rate": 4.9807362557588795e-05,
9500
+ "loss": 0.6775,
9501
+ "step": 13560
9502
+ },
9503
+ {
9504
+ "epoch": 6.906299225126891,
9505
+ "grad_norm": 0.1378648430109024,
9506
+ "learning_rate": 4.980722049457817e-05,
9507
+ "loss": 0.6799,
9508
+ "step": 13570
9509
+ },
9510
+ {
9511
+ "epoch": 6.91139079380738,
9512
+ "grad_norm": 0.1424325555562973,
9513
+ "learning_rate": 4.980707843156754e-05,
9514
+ "loss": 0.6777,
9515
+ "step": 13580
9516
+ },
9517
+ {
9518
+ "epoch": 6.9164823624878675,
9519
+ "grad_norm": 0.12795968353748322,
9520
+ "learning_rate": 4.9806936368556915e-05,
9521
+ "loss": 0.6756,
9522
+ "step": 13590
9523
+ },
9524
+ {
9525
+ "epoch": 6.921573931168356,
9526
+ "grad_norm": 0.16961532831192017,
9527
+ "learning_rate": 4.980679430554628e-05,
9528
+ "loss": 0.6762,
9529
+ "step": 13600
9530
+ },
9531
+ {
9532
+ "epoch": 6.926665499848844,
9533
+ "grad_norm": 0.16084560751914978,
9534
+ "learning_rate": 4.9806652242535654e-05,
9535
+ "loss": 0.6783,
9536
+ "step": 13610
9537
+ },
9538
+ {
9539
+ "epoch": 6.931757068529333,
9540
+ "grad_norm": 0.1510113775730133,
9541
+ "learning_rate": 4.980651017952503e-05,
9542
+ "loss": 0.676,
9543
+ "step": 13620
9544
+ },
9545
+ {
9546
+ "epoch": 6.93684863720982,
9547
+ "grad_norm": 0.1436864286661148,
9548
+ "learning_rate": 4.98063681165144e-05,
9549
+ "loss": 0.6769,
9550
+ "step": 13630
9551
+ },
9552
+ {
9553
+ "epoch": 6.941940205890308,
9554
+ "grad_norm": 0.14651361107826233,
9555
+ "learning_rate": 4.980622605350377e-05,
9556
+ "loss": 0.6786,
9557
+ "step": 13640
9558
+ },
9559
+ {
9560
+ "epoch": 6.947031774570797,
9561
+ "grad_norm": 0.12080514430999756,
9562
+ "learning_rate": 4.9806083990493146e-05,
9563
+ "loss": 0.6719,
9564
+ "step": 13650
9565
+ },
9566
+ {
9567
+ "epoch": 6.952123343251285,
9568
+ "grad_norm": 0.18036852777004242,
9569
+ "learning_rate": 4.980594192748252e-05,
9570
+ "loss": 0.6776,
9571
+ "step": 13660
9572
+ },
9573
+ {
9574
+ "epoch": 6.957214911931773,
9575
+ "grad_norm": 0.15538708865642548,
9576
+ "learning_rate": 4.980579986447189e-05,
9577
+ "loss": 0.677,
9578
+ "step": 13670
9579
+ },
9580
+ {
9581
+ "epoch": 6.962306480612261,
9582
+ "grad_norm": 0.14524763822555542,
9583
+ "learning_rate": 4.9805657801461266e-05,
9584
+ "loss": 0.6725,
9585
+ "step": 13680
9586
+ },
9587
+ {
9588
+ "epoch": 6.96739804929275,
9589
+ "grad_norm": 0.13171471655368805,
9590
+ "learning_rate": 4.980551573845064e-05,
9591
+ "loss": 0.6814,
9592
+ "step": 13690
9593
+ },
9594
+ {
9595
+ "epoch": 6.972489617973237,
9596
+ "grad_norm": 0.14730645716190338,
9597
+ "learning_rate": 4.980537367544001e-05,
9598
+ "loss": 0.6828,
9599
+ "step": 13700
9600
+ },
9601
+ {
9602
+ "epoch": 6.977581186653726,
9603
+ "grad_norm": 0.1142466589808464,
9604
+ "learning_rate": 4.980523161242938e-05,
9605
+ "loss": 0.677,
9606
+ "step": 13710
9607
+ },
9608
+ {
9609
+ "epoch": 6.982672755334214,
9610
+ "grad_norm": 0.11980883777141571,
9611
+ "learning_rate": 4.980508954941875e-05,
9612
+ "loss": 0.6847,
9613
+ "step": 13720
9614
+ },
9615
+ {
9616
+ "epoch": 6.987764324014702,
9617
+ "grad_norm": 0.10882198065519333,
9618
+ "learning_rate": 4.9804947486408125e-05,
9619
+ "loss": 0.6749,
9620
+ "step": 13730
9621
+ },
9622
+ {
9623
+ "epoch": 6.99285589269519,
9624
+ "grad_norm": 0.1418180912733078,
9625
+ "learning_rate": 4.980480542339749e-05,
9626
+ "loss": 0.675,
9627
+ "step": 13740
9628
  }
9629
  ],
9630
  "logging_steps": 10,
 
9639
  "should_evaluate": false,
9640
  "should_log": false,
9641
  "should_save": true,
9642
+ "should_training_stop": true
9643
  },
9644
  "attributes": {}
9645
  }