sedrickkeh commited on
Commit
308a2a5
·
verified ·
1 Parent(s): fcd7f95

Training in progress, epoch 4

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f50390804ec22af67466cea5be841e5fcc213913a974e8e2968b18bd93e223c8
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90214a308f1066f6fae0c40eb1f4fb35f2c44956db608a21192d60c5e34b5774
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5561e5b93e921440378a9a4f3f486f1acfecfd26657c70596434d86d5538179d
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e67615b89faf4c4d9eee0a1bdf9c1976bd6c0dd866544526a467f6411d8a132
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5246dd7b20006f8a58c7006fc0e6f4b4fbf464ff773ad89421c9226e606275f5
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:637c688cc4120b87b2712715ab55eb0624c681537e3cfd5349b965dbb225f0d2
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67f62aa3e50daec43fa0f74428ac2226af73b37f153a15a1ccba847c80f19a0a
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23cc1f0b90dc807d2fbcaadf72ce630f6d5753bbc82e1512d5a86baa89ee8744
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -312,3 +312,80 @@
312
  {"current_steps": 312, "total_steps": 390, "loss": 0.332, "lr": 2.339555568810221e-06, "epoch": 3.9936, "percentage": 80.0, "elapsed_time": "6:19:53", "remaining_time": "1:34:58"}
313
  {"current_steps": 313, "total_steps": 390, "loss": 0.5482, "lr": 2.28233106965885e-06, "epoch": 4.0064, "percentage": 80.26, "elapsed_time": "6:21:58", "remaining_time": "1:33:58"}
314
  {"current_steps": 314, "total_steps": 390, "loss": 0.2848, "lr": 2.2257248271023424e-06, "epoch": 4.0192, "percentage": 80.51, "elapsed_time": "6:23:11", "remaining_time": "1:32:44"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  {"current_steps": 312, "total_steps": 390, "loss": 0.332, "lr": 2.339555568810221e-06, "epoch": 3.9936, "percentage": 80.0, "elapsed_time": "6:19:53", "remaining_time": "1:34:58"}
313
  {"current_steps": 313, "total_steps": 390, "loss": 0.5482, "lr": 2.28233106965885e-06, "epoch": 4.0064, "percentage": 80.26, "elapsed_time": "6:21:58", "remaining_time": "1:33:58"}
314
  {"current_steps": 314, "total_steps": 390, "loss": 0.2848, "lr": 2.2257248271023424e-06, "epoch": 4.0192, "percentage": 80.51, "elapsed_time": "6:23:11", "remaining_time": "1:32:44"}
315
+ {"current_steps": 315, "total_steps": 390, "loss": 0.2906, "lr": 2.1697413758237785e-06, "epoch": 4.032, "percentage": 80.77, "elapsed_time": "6:24:32", "remaining_time": "1:31:33"}
316
+ {"current_steps": 316, "total_steps": 390, "loss": 0.3198, "lr": 2.114385200614912e-06, "epoch": 4.0448, "percentage": 81.03, "elapsed_time": "6:25:42", "remaining_time": "1:30:19"}
317
+ {"current_steps": 317, "total_steps": 390, "loss": 0.3037, "lr": 2.0596607360168897e-06, "epoch": 4.0576, "percentage": 81.28, "elapsed_time": "6:26:52", "remaining_time": "1:29:05"}
318
+ {"current_steps": 318, "total_steps": 390, "loss": 0.3194, "lr": 2.0055723659649907e-06, "epoch": 4.0704, "percentage": 81.54, "elapsed_time": "6:28:02", "remaining_time": "1:27:51"}
319
+ {"current_steps": 319, "total_steps": 390, "loss": 0.328, "lr": 1.952124423437447e-06, "epoch": 4.0832, "percentage": 81.79, "elapsed_time": "6:29:17", "remaining_time": "1:26:38"}
320
+ {"current_steps": 320, "total_steps": 390, "loss": 0.2627, "lr": 1.8993211901083353e-06, "epoch": 4.096, "percentage": 82.05, "elapsed_time": "6:30:21", "remaining_time": "1:25:23"}
321
+ {"current_steps": 321, "total_steps": 390, "loss": 0.3274, "lr": 1.8471668960045575e-06, "epoch": 4.1088, "percentage": 82.31, "elapsed_time": "6:31:37", "remaining_time": "1:24:10"}
322
+ {"current_steps": 322, "total_steps": 390, "loss": 0.3063, "lr": 1.7956657191669969e-06, "epoch": 4.1216, "percentage": 82.56, "elapsed_time": "6:32:50", "remaining_time": "1:22:57"}
323
+ {"current_steps": 323, "total_steps": 390, "loss": 0.2982, "lr": 1.7448217853158e-06, "epoch": 4.1344, "percentage": 82.82, "elapsed_time": "6:34:01", "remaining_time": "1:21:43"}
324
+ {"current_steps": 324, "total_steps": 390, "loss": 0.2949, "lr": 1.6946391675198838e-06, "epoch": 4.1472, "percentage": 83.08, "elapsed_time": "6:35:10", "remaining_time": "1:20:29"}
325
+ {"current_steps": 325, "total_steps": 390, "loss": 0.295, "lr": 1.6451218858706374e-06, "epoch": 4.16, "percentage": 83.33, "elapsed_time": "6:36:23", "remaining_time": "1:19:16"}
326
+ {"current_steps": 326, "total_steps": 390, "loss": 0.3322, "lr": 1.5962739071598709e-06, "epoch": 4.1728, "percentage": 83.59, "elapsed_time": "6:37:36", "remaining_time": "1:18:03"}
327
+ {"current_steps": 327, "total_steps": 390, "loss": 0.316, "lr": 1.5480991445620541e-06, "epoch": 4.1856, "percentage": 83.85, "elapsed_time": "6:38:47", "remaining_time": "1:16:49"}
328
+ {"current_steps": 328, "total_steps": 390, "loss": 0.301, "lr": 1.500601457320814e-06, "epoch": 4.1984, "percentage": 84.1, "elapsed_time": "6:40:03", "remaining_time": "1:15:37"}
329
+ {"current_steps": 329, "total_steps": 390, "loss": 0.3041, "lr": 1.453784650439798e-06, "epoch": 4.2112, "percentage": 84.36, "elapsed_time": "6:41:27", "remaining_time": "1:14:26"}
330
+ {"current_steps": 330, "total_steps": 390, "loss": 0.2811, "lr": 1.407652474377832e-06, "epoch": 4.224, "percentage": 84.62, "elapsed_time": "6:42:37", "remaining_time": "1:13:12"}
331
+ {"current_steps": 331, "total_steps": 390, "loss": 0.2917, "lr": 1.3622086247484989e-06, "epoch": 4.2368, "percentage": 84.87, "elapsed_time": "6:43:48", "remaining_time": "1:11:58"}
332
+ {"current_steps": 332, "total_steps": 390, "loss": 0.2873, "lr": 1.3174567420240647e-06, "epoch": 4.2496, "percentage": 85.13, "elapsed_time": "6:45:04", "remaining_time": "1:10:45"}
333
+ {"current_steps": 333, "total_steps": 390, "loss": 0.2657, "lr": 1.273400411243857e-06, "epoch": 4.2624, "percentage": 85.38, "elapsed_time": "6:46:05", "remaining_time": "1:09:30"}
334
+ {"current_steps": 334, "total_steps": 390, "loss": 0.2832, "lr": 1.2300431617270669e-06, "epoch": 4.2752, "percentage": 85.64, "elapsed_time": "6:47:20", "remaining_time": "1:08:17"}
335
+ {"current_steps": 335, "total_steps": 390, "loss": 0.3077, "lr": 1.1873884667900125e-06, "epoch": 4.288, "percentage": 85.9, "elapsed_time": "6:48:34", "remaining_time": "1:07:04"}
336
+ {"current_steps": 336, "total_steps": 390, "loss": 0.3122, "lr": 1.1454397434679022e-06, "epoch": 4.3008, "percentage": 86.15, "elapsed_time": "6:49:47", "remaining_time": "1:05:51"}
337
+ {"current_steps": 337, "total_steps": 390, "loss": 0.2802, "lr": 1.1042003522410882e-06, "epoch": 4.3136, "percentage": 86.41, "elapsed_time": "6:50:50", "remaining_time": "1:04:36"}
338
+ {"current_steps": 338, "total_steps": 390, "loss": 0.3426, "lr": 1.0636735967658785e-06, "epoch": 4.3264, "percentage": 86.67, "elapsed_time": "6:51:59", "remaining_time": "1:03:23"}
339
+ {"current_steps": 339, "total_steps": 390, "loss": 0.2929, "lr": 1.0238627236098619e-06, "epoch": 4.3392, "percentage": 86.92, "elapsed_time": "6:53:09", "remaining_time": "1:02:09"}
340
+ {"current_steps": 340, "total_steps": 390, "loss": 0.33, "lr": 9.8477092199184e-07, "epoch": 4.352, "percentage": 87.18, "elapsed_time": "6:54:29", "remaining_time": "1:00:57"}
341
+ {"current_steps": 341, "total_steps": 390, "loss": 0.2955, "lr": 9.464013235263458e-07, "epoch": 4.3648, "percentage": 87.44, "elapsed_time": "6:55:51", "remaining_time": "0:59:45"}
342
+ {"current_steps": 342, "total_steps": 390, "loss": 0.2672, "lr": 9.08757001972762e-07, "epoch": 4.3776, "percentage": 87.69, "elapsed_time": "6:56:55", "remaining_time": "0:58:30"}
343
+ {"current_steps": 343, "total_steps": 390, "loss": 0.3898, "lr": 8.71840972989092e-07, "epoch": 4.3904, "percentage": 87.95, "elapsed_time": "6:58:09", "remaining_time": "0:57:17"}
344
+ {"current_steps": 344, "total_steps": 390, "loss": 0.2633, "lr": 8.356561938903707e-07, "epoch": 4.4032, "percentage": 88.21, "elapsed_time": "6:59:20", "remaining_time": "0:56:04"}
345
+ {"current_steps": 345, "total_steps": 390, "loss": 0.2961, "lr": 8.002055634117578e-07, "epoch": 4.416, "percentage": 88.46, "elapsed_time": "7:00:33", "remaining_time": "0:54:51"}
346
+ {"current_steps": 346, "total_steps": 390, "loss": 0.2982, "lr": 7.654919214763357e-07, "epoch": 4.4288, "percentage": 88.72, "elapsed_time": "7:01:45", "remaining_time": "0:53:38"}
347
+ {"current_steps": 347, "total_steps": 390, "loss": 0.3052, "lr": 7.315180489675822e-07, "epoch": 4.4416, "percentage": 88.97, "elapsed_time": "7:02:57", "remaining_time": "0:52:24"}
348
+ {"current_steps": 348, "total_steps": 390, "loss": 0.2831, "lr": 6.98286667506618e-07, "epoch": 4.4544, "percentage": 89.23, "elapsed_time": "7:04:10", "remaining_time": "0:51:11"}
349
+ {"current_steps": 349, "total_steps": 390, "loss": 0.3096, "lr": 6.658004392341633e-07, "epoch": 4.4672, "percentage": 89.49, "elapsed_time": "7:05:25", "remaining_time": "0:49:58"}
350
+ {"current_steps": 350, "total_steps": 390, "loss": 0.3138, "lr": 6.340619665972847e-07, "epoch": 4.48, "percentage": 89.74, "elapsed_time": "7:06:42", "remaining_time": "0:48:45"}
351
+ {"current_steps": 351, "total_steps": 390, "loss": 0.2916, "lr": 6.030737921409169e-07, "epoch": 4.4928, "percentage": 90.0, "elapsed_time": "7:07:54", "remaining_time": "0:47:32"}
352
+ {"current_steps": 352, "total_steps": 390, "loss": 0.3105, "lr": 5.728383983041696e-07, "epoch": 4.5056, "percentage": 90.26, "elapsed_time": "7:08:57", "remaining_time": "0:46:18"}
353
+ {"current_steps": 353, "total_steps": 390, "loss": 0.2893, "lr": 5.43358207221476e-07, "epoch": 4.5184, "percentage": 90.51, "elapsed_time": "7:10:12", "remaining_time": "0:45:05"}
354
+ {"current_steps": 354, "total_steps": 390, "loss": 0.2767, "lr": 5.146355805285452e-07, "epoch": 4.5312, "percentage": 90.77, "elapsed_time": "7:11:27", "remaining_time": "0:43:52"}
355
+ {"current_steps": 355, "total_steps": 390, "loss": 0.2973, "lr": 4.866728191731829e-07, "epoch": 4.5440000000000005, "percentage": 91.03, "elapsed_time": "7:12:33", "remaining_time": "0:42:38"}
356
+ {"current_steps": 356, "total_steps": 390, "loss": 0.2797, "lr": 4.594721632309551e-07, "epoch": 4.5568, "percentage": 91.28, "elapsed_time": "7:13:37", "remaining_time": "0:41:24"}
357
+ {"current_steps": 357, "total_steps": 390, "loss": 0.2959, "lr": 4.3303579172574884e-07, "epoch": 4.5696, "percentage": 91.54, "elapsed_time": "7:14:51", "remaining_time": "0:40:11"}
358
+ {"current_steps": 358, "total_steps": 390, "loss": 0.2958, "lr": 4.0736582245519795e-07, "epoch": 4.5824, "percentage": 91.79, "elapsed_time": "7:15:58", "remaining_time": "0:38:58"}
359
+ {"current_steps": 359, "total_steps": 390, "loss": 0.294, "lr": 3.824643118210403e-07, "epoch": 4.5952, "percentage": 92.05, "elapsed_time": "7:17:14", "remaining_time": "0:37:45"}
360
+ {"current_steps": 360, "total_steps": 390, "loss": 0.2637, "lr": 3.5833325466437697e-07, "epoch": 4.608, "percentage": 92.31, "elapsed_time": "7:18:15", "remaining_time": "0:36:31"}
361
+ {"current_steps": 361, "total_steps": 390, "loss": 0.3378, "lr": 3.349745841058605e-07, "epoch": 4.6208, "percentage": 92.56, "elapsed_time": "7:19:26", "remaining_time": "0:35:18"}
362
+ {"current_steps": 362, "total_steps": 390, "loss": 0.2696, "lr": 3.1239017139084725e-07, "epoch": 4.6336, "percentage": 92.82, "elapsed_time": "7:20:34", "remaining_time": "0:34:04"}
363
+ {"current_steps": 363, "total_steps": 390, "loss": 0.322, "lr": 2.905818257394799e-07, "epoch": 4.6464, "percentage": 93.08, "elapsed_time": "7:21:56", "remaining_time": "0:32:52"}
364
+ {"current_steps": 364, "total_steps": 390, "loss": 0.2954, "lr": 2.6955129420176193e-07, "epoch": 4.6592, "percentage": 93.33, "elapsed_time": "7:23:14", "remaining_time": "0:31:39"}
365
+ {"current_steps": 365, "total_steps": 390, "loss": 0.3194, "lr": 2.493002615175977e-07, "epoch": 4.672, "percentage": 93.59, "elapsed_time": "7:24:21", "remaining_time": "0:30:26"}
366
+ {"current_steps": 366, "total_steps": 390, "loss": 0.2718, "lr": 2.2983034998182997e-07, "epoch": 4.6848, "percentage": 93.85, "elapsed_time": "7:25:31", "remaining_time": "0:29:12"}
367
+ {"current_steps": 367, "total_steps": 390, "loss": 0.2741, "lr": 2.11143119314281e-07, "epoch": 4.6975999999999996, "percentage": 94.1, "elapsed_time": "7:26:37", "remaining_time": "0:27:59"}
368
+ {"current_steps": 368, "total_steps": 390, "loss": 0.3066, "lr": 1.9324006653480332e-07, "epoch": 4.7104, "percentage": 94.36, "elapsed_time": "7:27:53", "remaining_time": "0:26:46"}
369
+ {"current_steps": 369, "total_steps": 390, "loss": 0.2758, "lr": 1.761226258433524e-07, "epoch": 4.7232, "percentage": 94.62, "elapsed_time": "7:29:12", "remaining_time": "0:25:33"}
370
+ {"current_steps": 370, "total_steps": 390, "loss": 0.3005, "lr": 1.5979216850509848e-07, "epoch": 4.736, "percentage": 94.87, "elapsed_time": "7:30:26", "remaining_time": "0:24:20"}
371
+ {"current_steps": 371, "total_steps": 390, "loss": 0.3216, "lr": 1.4425000274057577e-07, "epoch": 4.7488, "percentage": 95.13, "elapsed_time": "7:31:39", "remaining_time": "0:23:07"}
372
+ {"current_steps": 372, "total_steps": 390, "loss": 0.3125, "lr": 1.2949737362087156e-07, "epoch": 4.7616, "percentage": 95.38, "elapsed_time": "7:32:54", "remaining_time": "0:21:54"}
373
+ {"current_steps": 373, "total_steps": 390, "loss": 0.2455, "lr": 1.1553546296789952e-07, "epoch": 4.7744, "percentage": 95.64, "elapsed_time": "7:33:56", "remaining_time": "0:20:41"}
374
+ {"current_steps": 374, "total_steps": 390, "loss": 0.3125, "lr": 1.0236538925971429e-07, "epoch": 4.7872, "percentage": 95.9, "elapsed_time": "7:35:06", "remaining_time": "0:19:28"}
375
+ {"current_steps": 375, "total_steps": 390, "loss": 0.35, "lr": 8.99882075409153e-08, "epoch": 4.8, "percentage": 96.15, "elapsed_time": "7:36:20", "remaining_time": "0:18:15"}
376
+ {"current_steps": 376, "total_steps": 390, "loss": 0.2607, "lr": 7.840490933812783e-08, "epoch": 4.8128, "percentage": 96.41, "elapsed_time": "7:37:33", "remaining_time": "0:17:02"}
377
+ {"current_steps": 377, "total_steps": 390, "loss": 0.2915, "lr": 6.761642258056977e-08, "epoch": 4.8256, "percentage": 96.67, "elapsed_time": "7:38:48", "remaining_time": "0:15:49"}
378
+ {"current_steps": 378, "total_steps": 390, "loss": 0.2974, "lr": 5.7623611525721155e-08, "epoch": 4.8384, "percentage": 96.92, "elapsed_time": "7:40:04", "remaining_time": "0:14:36"}
379
+ {"current_steps": 379, "total_steps": 390, "loss": 0.2867, "lr": 4.8427276690081735e-08, "epoch": 4.8512, "percentage": 97.18, "elapsed_time": "7:41:19", "remaining_time": "0:13:23"}
380
+ {"current_steps": 380, "total_steps": 390, "loss": 0.3096, "lr": 4.002815478505007e-08, "epoch": 4.864, "percentage": 97.44, "elapsed_time": "7:42:34", "remaining_time": "0:12:10"}
381
+ {"current_steps": 381, "total_steps": 390, "loss": 0.2806, "lr": 3.242691865790071e-08, "epoch": 4.8768, "percentage": 97.69, "elapsed_time": "7:43:49", "remaining_time": "0:10:57"}
382
+ {"current_steps": 382, "total_steps": 390, "loss": 0.2824, "lr": 2.5624177237884017e-08, "epoch": 4.8896, "percentage": 97.95, "elapsed_time": "7:45:07", "remaining_time": "0:09:44"}
383
+ {"current_steps": 383, "total_steps": 390, "loss": 0.3351, "lr": 1.962047548744961e-08, "epoch": 4.9024, "percentage": 98.21, "elapsed_time": "7:46:28", "remaining_time": "0:08:31"}
384
+ {"current_steps": 384, "total_steps": 390, "loss": 0.2763, "lr": 1.4416294358582383e-08, "epoch": 4.9152000000000005, "percentage": 98.46, "elapsed_time": "7:47:39", "remaining_time": "0:07:18"}
385
+ {"current_steps": 385, "total_steps": 390, "loss": 0.2982, "lr": 1.0012050754277802e-08, "epoch": 4.928, "percentage": 98.72, "elapsed_time": "7:48:44", "remaining_time": "0:06:05"}
386
+ {"current_steps": 386, "total_steps": 390, "loss": 0.3105, "lr": 6.40809749514637e-09, "epoch": 4.9408, "percentage": 98.97, "elapsed_time": "7:49:55", "remaining_time": "0:04:52"}
387
+ {"current_steps": 387, "total_steps": 390, "loss": 0.2789, "lr": 3.6047232911462506e-09, "epoch": 4.9536, "percentage": 99.23, "elapsed_time": "7:51:04", "remaining_time": "0:03:39"}
388
+ {"current_steps": 388, "total_steps": 390, "loss": 0.3004, "lr": 1.6021527184528761e-09, "epoch": 4.9664, "percentage": 99.49, "elapsed_time": "7:52:16", "remaining_time": "0:02:26"}
389
+ {"current_steps": 389, "total_steps": 390, "loss": 0.3077, "lr": 4.005462014766703e-10, "epoch": 4.9792, "percentage": 99.74, "elapsed_time": "7:53:29", "remaining_time": "0:01:13"}
390
+ {"current_steps": 390, "total_steps": 390, "loss": 0.3067, "lr": 0.0, "epoch": 4.992, "percentage": 100.0, "elapsed_time": "7:54:39", "remaining_time": "0:00:00"}
391
+ {"current_steps": 390, "total_steps": 390, "epoch": 4.992, "percentage": 100.0, "elapsed_time": "7:56:27", "remaining_time": "0:00:00"}