ryanmarten commited on
Commit
1e839fa
·
verified ·
1 Parent(s): fe5d5d0

Training in progress, epoch 4

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b273a0de18b9bb0cb048920cc2a72787bafd7a3ccdf058aab5a70a06ffc343ba
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0671fb4d7c51bc97b0116e37d0d338bf06ab77f5fe1638f037ad53aa49c2a43
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f0964e9b7e94f40c49caf3f905de595d217dc7f3ba43f72095980ae6e3b9ba0
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2acad3d6093a2cd73475e693bdc5516bb1080de646188317059dfcd9dae2a7b9
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7e6a78499e1f29772b52f73f72ba170238306010e53f97c483bb5e7a026cac9
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d7a7295b86cd3ffac412d8728ffd34106b047f562aac2270c808e661c2e12af
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ffe9e5199eecafc43b8d27d2f4ce6d452850b480912bcceb83df30f772279b6
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7921455a3e40567d3acab78d6659af72c95b3c1a75915e937987d948d70e640d
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -311,3 +311,81 @@
311
  {"current_steps": 311, "total_steps": 390, "loss": 0.4358, "lr": 4.794787480690597e-06, "epoch": 3.9808, "percentage": 79.74, "elapsed_time": "13:21:57", "remaining_time": "3:23:42"}
312
  {"current_steps": 312, "total_steps": 390, "loss": 0.4334, "lr": 4.679111137620442e-06, "epoch": 3.9936, "percentage": 80.0, "elapsed_time": "13:24:52", "remaining_time": "3:21:13"}
313
  {"current_steps": 313, "total_steps": 390, "loss": 0.4189, "lr": 4.5646621393177e-06, "epoch": 4.0064, "percentage": 80.26, "elapsed_time": "13:28:24", "remaining_time": "3:18:52"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  {"current_steps": 311, "total_steps": 390, "loss": 0.4358, "lr": 4.794787480690597e-06, "epoch": 3.9808, "percentage": 79.74, "elapsed_time": "13:21:57", "remaining_time": "3:23:42"}
312
  {"current_steps": 312, "total_steps": 390, "loss": 0.4334, "lr": 4.679111137620442e-06, "epoch": 3.9936, "percentage": 80.0, "elapsed_time": "13:24:52", "remaining_time": "3:21:13"}
313
  {"current_steps": 313, "total_steps": 390, "loss": 0.4189, "lr": 4.5646621393177e-06, "epoch": 4.0064, "percentage": 80.26, "elapsed_time": "13:28:24", "remaining_time": "3:18:52"}
314
+ {"current_steps": 314, "total_steps": 390, "loss": 0.3899, "lr": 4.451449654204685e-06, "epoch": 4.0192, "percentage": 80.51, "elapsed_time": "13:31:06", "remaining_time": "3:16:19"}
315
+ {"current_steps": 315, "total_steps": 390, "loss": 0.3972, "lr": 4.339482751647557e-06, "epoch": 4.032, "percentage": 80.77, "elapsed_time": "13:33:42", "remaining_time": "3:13:44"}
316
+ {"current_steps": 316, "total_steps": 390, "loss": 0.3989, "lr": 4.228770401229824e-06, "epoch": 4.0448, "percentage": 81.03, "elapsed_time": "13:36:22", "remaining_time": "3:11:10"}
317
+ {"current_steps": 317, "total_steps": 390, "loss": 0.3896, "lr": 4.119321472033779e-06, "epoch": 4.0576, "percentage": 81.28, "elapsed_time": "13:38:52", "remaining_time": "3:08:34"}
318
+ {"current_steps": 318, "total_steps": 390, "loss": 0.3868, "lr": 4.011144731929981e-06, "epoch": 4.0704, "percentage": 81.54, "elapsed_time": "13:41:27", "remaining_time": "3:05:59"}
319
+ {"current_steps": 319, "total_steps": 390, "loss": 0.4055, "lr": 3.904248846874894e-06, "epoch": 4.0832, "percentage": 81.79, "elapsed_time": "13:44:03", "remaining_time": "3:03:24"}
320
+ {"current_steps": 320, "total_steps": 390, "loss": 0.3862, "lr": 3.7986423802166705e-06, "epoch": 4.096, "percentage": 82.05, "elapsed_time": "13:46:49", "remaining_time": "3:00:52"}
321
+ {"current_steps": 321, "total_steps": 390, "loss": 0.4055, "lr": 3.694333792009115e-06, "epoch": 4.1088, "percentage": 82.31, "elapsed_time": "13:49:16", "remaining_time": "2:58:15"}
322
+ {"current_steps": 322, "total_steps": 390, "loss": 0.4132, "lr": 3.5913314383339937e-06, "epoch": 4.1216, "percentage": 82.56, "elapsed_time": "13:52:01", "remaining_time": "2:55:42"}
323
+ {"current_steps": 323, "total_steps": 390, "loss": 0.4181, "lr": 3.4896435706316e-06, "epoch": 4.1344, "percentage": 82.82, "elapsed_time": "13:54:36", "remaining_time": "2:53:07"}
324
+ {"current_steps": 324, "total_steps": 390, "loss": 0.3906, "lr": 3.3892783350397675e-06, "epoch": 4.1472, "percentage": 83.08, "elapsed_time": "13:57:12", "remaining_time": "2:50:32"}
325
+ {"current_steps": 325, "total_steps": 390, "loss": 0.413, "lr": 3.290243771741275e-06, "epoch": 4.16, "percentage": 83.33, "elapsed_time": "13:59:52", "remaining_time": "2:47:58"}
326
+ {"current_steps": 326, "total_steps": 390, "loss": 0.3986, "lr": 3.1925478143197418e-06, "epoch": 4.1728, "percentage": 83.59, "elapsed_time": "14:02:31", "remaining_time": "2:45:24"}
327
+ {"current_steps": 327, "total_steps": 390, "loss": 0.4018, "lr": 3.0961982891241083e-06, "epoch": 4.1856, "percentage": 83.85, "elapsed_time": "14:04:58", "remaining_time": "2:42:47"}
328
+ {"current_steps": 328, "total_steps": 390, "loss": 0.418, "lr": 3.001202914641628e-06, "epoch": 4.1984, "percentage": 84.1, "elapsed_time": "14:07:33", "remaining_time": "2:40:12"}
329
+ {"current_steps": 329, "total_steps": 390, "loss": 0.3914, "lr": 2.907569300879596e-06, "epoch": 4.2112, "percentage": 84.36, "elapsed_time": "14:10:00", "remaining_time": "2:37:36"}
330
+ {"current_steps": 330, "total_steps": 390, "loss": 0.4077, "lr": 2.815304948755664e-06, "epoch": 4.224, "percentage": 84.62, "elapsed_time": "14:12:35", "remaining_time": "2:35:01"}
331
+ {"current_steps": 331, "total_steps": 390, "loss": 0.3905, "lr": 2.7244172494969978e-06, "epoch": 4.2368, "percentage": 84.87, "elapsed_time": "14:15:09", "remaining_time": "2:32:25"}
332
+ {"current_steps": 332, "total_steps": 390, "loss": 0.4058, "lr": 2.6349134840481294e-06, "epoch": 4.2496, "percentage": 85.13, "elapsed_time": "14:17:44", "remaining_time": "2:29:50"}
333
+ {"current_steps": 333, "total_steps": 390, "loss": 0.3976, "lr": 2.546800822487714e-06, "epoch": 4.2624, "percentage": 85.38, "elapsed_time": "14:20:27", "remaining_time": "2:27:17"}
334
+ {"current_steps": 334, "total_steps": 390, "loss": 0.4079, "lr": 2.4600863234541338e-06, "epoch": 4.2752, "percentage": 85.64, "elapsed_time": "14:23:08", "remaining_time": "2:24:43"}
335
+ {"current_steps": 335, "total_steps": 390, "loss": 0.3865, "lr": 2.374776933580025e-06, "epoch": 4.288, "percentage": 85.9, "elapsed_time": "14:25:32", "remaining_time": "2:22:06"}
336
+ {"current_steps": 336, "total_steps": 390, "loss": 0.3757, "lr": 2.2908794869358044e-06, "epoch": 4.3008, "percentage": 86.15, "elapsed_time": "14:28:02", "remaining_time": "2:19:30"}
337
+ {"current_steps": 337, "total_steps": 390, "loss": 0.3834, "lr": 2.2084007044821764e-06, "epoch": 4.3136, "percentage": 86.41, "elapsed_time": "14:30:33", "remaining_time": "2:16:54"}
338
+ {"current_steps": 338, "total_steps": 390, "loss": 0.4222, "lr": 2.127347193531757e-06, "epoch": 4.3264, "percentage": 86.67, "elapsed_time": "14:33:03", "remaining_time": "2:14:18"}
339
+ {"current_steps": 339, "total_steps": 390, "loss": 0.4139, "lr": 2.0477254472197237e-06, "epoch": 4.3392, "percentage": 86.92, "elapsed_time": "14:35:48", "remaining_time": "2:11:45"}
340
+ {"current_steps": 340, "total_steps": 390, "loss": 0.3828, "lr": 1.96954184398368e-06, "epoch": 4.352, "percentage": 87.18, "elapsed_time": "14:38:27", "remaining_time": "2:09:11"}
341
+ {"current_steps": 341, "total_steps": 390, "loss": 0.4081, "lr": 1.8928026470526917e-06, "epoch": 4.3648, "percentage": 87.44, "elapsed_time": "14:41:00", "remaining_time": "2:06:35"}
342
+ {"current_steps": 342, "total_steps": 390, "loss": 0.41, "lr": 1.817514003945524e-06, "epoch": 4.3776, "percentage": 87.69, "elapsed_time": "14:43:37", "remaining_time": "2:04:01"}
343
+ {"current_steps": 343, "total_steps": 390, "loss": 0.4159, "lr": 1.743681945978184e-06, "epoch": 4.3904, "percentage": 87.95, "elapsed_time": "14:46:19", "remaining_time": "2:01:27"}
344
+ {"current_steps": 344, "total_steps": 390, "loss": 0.3781, "lr": 1.6713123877807413e-06, "epoch": 4.4032, "percentage": 88.21, "elapsed_time": "14:48:35", "remaining_time": "1:58:49"}
345
+ {"current_steps": 345, "total_steps": 390, "loss": 0.3849, "lr": 1.6004111268235156e-06, "epoch": 4.416, "percentage": 88.46, "elapsed_time": "14:51:00", "remaining_time": "1:56:13"}
346
+ {"current_steps": 346, "total_steps": 390, "loss": 0.397, "lr": 1.5309838429526714e-06, "epoch": 4.4288, "percentage": 88.72, "elapsed_time": "14:53:31", "remaining_time": "1:53:37"}
347
+ {"current_steps": 347, "total_steps": 390, "loss": 0.3974, "lr": 1.4630360979351644e-06, "epoch": 4.4416, "percentage": 88.97, "elapsed_time": "14:56:03", "remaining_time": "1:51:02"}
348
+ {"current_steps": 348, "total_steps": 390, "loss": 0.3987, "lr": 1.396573335013236e-06, "epoch": 4.4544, "percentage": 89.23, "elapsed_time": "14:58:23", "remaining_time": "1:48:25"}
349
+ {"current_steps": 349, "total_steps": 390, "loss": 0.3825, "lr": 1.3316008784683265e-06, "epoch": 4.4672, "percentage": 89.49, "elapsed_time": "15:00:59", "remaining_time": "1:45:50"}
350
+ {"current_steps": 350, "total_steps": 390, "loss": 0.3793, "lr": 1.2681239331945695e-06, "epoch": 4.48, "percentage": 89.74, "elapsed_time": "15:03:40", "remaining_time": "1:43:16"}
351
+ {"current_steps": 351, "total_steps": 390, "loss": 0.405, "lr": 1.2061475842818337e-06, "epoch": 4.4928, "percentage": 90.0, "elapsed_time": "15:06:08", "remaining_time": "1:40:40"}
352
+ {"current_steps": 352, "total_steps": 390, "loss": 0.3924, "lr": 1.1456767966083393e-06, "epoch": 4.5056, "percentage": 90.26, "elapsed_time": "15:08:34", "remaining_time": "1:38:05"}
353
+ {"current_steps": 353, "total_steps": 390, "loss": 0.4036, "lr": 1.086716414442952e-06, "epoch": 4.5184, "percentage": 90.51, "elapsed_time": "15:11:09", "remaining_time": "1:35:30"}
354
+ {"current_steps": 354, "total_steps": 390, "loss": 0.3954, "lr": 1.0292711610570904e-06, "epoch": 4.5312, "percentage": 90.77, "elapsed_time": "15:13:38", "remaining_time": "1:32:54"}
355
+ {"current_steps": 355, "total_steps": 390, "loss": 0.3966, "lr": 9.733456383463658e-07, "epoch": 4.5440000000000005, "percentage": 91.03, "elapsed_time": "15:16:13", "remaining_time": "1:30:19"}
356
+ {"current_steps": 356, "total_steps": 390, "loss": 0.4047, "lr": 9.189443264619102e-07, "epoch": 4.5568, "percentage": 91.28, "elapsed_time": "15:18:58", "remaining_time": "1:27:46"}
357
+ {"current_steps": 357, "total_steps": 390, "loss": 0.3863, "lr": 8.660715834514977e-07, "epoch": 4.5696, "percentage": 91.54, "elapsed_time": "15:21:37", "remaining_time": "1:25:11"}
358
+ {"current_steps": 358, "total_steps": 390, "loss": 0.4018, "lr": 8.147316449103959e-07, "epoch": 4.5824, "percentage": 91.79, "elapsed_time": "15:24:12", "remaining_time": "1:22:36"}
359
+ {"current_steps": 359, "total_steps": 390, "loss": 0.4058, "lr": 7.649286236420806e-07, "epoch": 4.5952, "percentage": 92.05, "elapsed_time": "15:26:43", "remaining_time": "1:20:01"}
360
+ {"current_steps": 360, "total_steps": 390, "loss": 0.397, "lr": 7.166665093287539e-07, "epoch": 4.608, "percentage": 92.31, "elapsed_time": "15:29:21", "remaining_time": "1:17:26"}
361
+ {"current_steps": 361, "total_steps": 390, "loss": 0.3889, "lr": 6.69949168211721e-07, "epoch": 4.6208, "percentage": 92.56, "elapsed_time": "15:31:54", "remaining_time": "1:14:51"}
362
+ {"current_steps": 362, "total_steps": 390, "loss": 0.4026, "lr": 6.247803427816945e-07, "epoch": 4.6336, "percentage": 92.82, "elapsed_time": "15:34:33", "remaining_time": "1:12:17"}
363
+ {"current_steps": 363, "total_steps": 390, "loss": 0.4046, "lr": 5.811636514789598e-07, "epoch": 4.6464, "percentage": 93.08, "elapsed_time": "15:37:19", "remaining_time": "1:09:43"}
364
+ {"current_steps": 364, "total_steps": 390, "loss": 0.3909, "lr": 5.391025884035239e-07, "epoch": 4.6592, "percentage": 93.33, "elapsed_time": "15:39:51", "remaining_time": "1:07:07"}
365
+ {"current_steps": 365, "total_steps": 390, "loss": 0.4033, "lr": 4.986005230351954e-07, "epoch": 4.672, "percentage": 93.59, "elapsed_time": "15:42:27", "remaining_time": "1:04:33"}
366
+ {"current_steps": 366, "total_steps": 390, "loss": 0.3883, "lr": 4.5966069996365993e-07, "epoch": 4.6848, "percentage": 93.85, "elapsed_time": "15:44:54", "remaining_time": "1:01:57"}
367
+ {"current_steps": 367, "total_steps": 390, "loss": 0.3932, "lr": 4.22286238628562e-07, "epoch": 4.6975999999999996, "percentage": 94.1, "elapsed_time": "15:47:46", "remaining_time": "0:59:23"}
368
+ {"current_steps": 368, "total_steps": 390, "loss": 0.4024, "lr": 3.8648013306960664e-07, "epoch": 4.7104, "percentage": 94.36, "elapsed_time": "15:50:15", "remaining_time": "0:56:48"}
369
+ {"current_steps": 369, "total_steps": 390, "loss": 0.3907, "lr": 3.522452516867048e-07, "epoch": 4.7232, "percentage": 94.62, "elapsed_time": "15:52:43", "remaining_time": "0:54:13"}
370
+ {"current_steps": 370, "total_steps": 390, "loss": 0.4016, "lr": 3.1958433701019697e-07, "epoch": 4.736, "percentage": 94.87, "elapsed_time": "15:55:19", "remaining_time": "0:51:38"}
371
+ {"current_steps": 371, "total_steps": 390, "loss": 0.3905, "lr": 2.8850000548115155e-07, "epoch": 4.7488, "percentage": 95.13, "elapsed_time": "15:58:00", "remaining_time": "0:49:03"}
372
+ {"current_steps": 372, "total_steps": 390, "loss": 0.4058, "lr": 2.5899474724174313e-07, "epoch": 4.7616, "percentage": 95.38, "elapsed_time": "16:00:32", "remaining_time": "0:46:28"}
373
+ {"current_steps": 373, "total_steps": 390, "loss": 0.405, "lr": 2.3107092593579905e-07, "epoch": 4.7744, "percentage": 95.64, "elapsed_time": "16:03:17", "remaining_time": "0:43:54"}
374
+ {"current_steps": 374, "total_steps": 390, "loss": 0.3893, "lr": 2.0473077851942858e-07, "epoch": 4.7872, "percentage": 95.9, "elapsed_time": "16:05:48", "remaining_time": "0:41:19"}
375
+ {"current_steps": 375, "total_steps": 390, "loss": 0.417, "lr": 1.799764150818306e-07, "epoch": 4.8, "percentage": 96.15, "elapsed_time": "16:08:21", "remaining_time": "0:38:44"}
376
+ {"current_steps": 376, "total_steps": 390, "loss": 0.3968, "lr": 1.5680981867625566e-07, "epoch": 4.8128, "percentage": 96.41, "elapsed_time": "16:10:50", "remaining_time": "0:36:08"}
377
+ {"current_steps": 377, "total_steps": 390, "loss": 0.3878, "lr": 1.3523284516113955e-07, "epoch": 4.8256, "percentage": 96.67, "elapsed_time": "16:13:09", "remaining_time": "0:33:33"}
378
+ {"current_steps": 378, "total_steps": 390, "loss": 0.404, "lr": 1.1524722305144231e-07, "epoch": 4.8384, "percentage": 96.92, "elapsed_time": "16:15:45", "remaining_time": "0:30:58"}
379
+ {"current_steps": 379, "total_steps": 390, "loss": 0.4, "lr": 9.685455338016347e-08, "epoch": 4.8512, "percentage": 97.18, "elapsed_time": "16:18:32", "remaining_time": "0:28:24"}
380
+ {"current_steps": 380, "total_steps": 390, "loss": 0.3883, "lr": 8.005630957010014e-08, "epoch": 4.864, "percentage": 97.44, "elapsed_time": "16:20:58", "remaining_time": "0:25:48"}
381
+ {"current_steps": 381, "total_steps": 390, "loss": 0.4201, "lr": 6.485383731580142e-08, "epoch": 4.8768, "percentage": 97.69, "elapsed_time": "16:23:35", "remaining_time": "0:23:14"}
382
+ {"current_steps": 382, "total_steps": 390, "loss": 0.3943, "lr": 5.1248354475768034e-08, "epoch": 4.8896, "percentage": 97.95, "elapsed_time": "16:25:58", "remaining_time": "0:20:38"}
383
+ {"current_steps": 383, "total_steps": 390, "loss": 0.4187, "lr": 3.924095097489922e-08, "epoch": 4.9024, "percentage": 98.21, "elapsed_time": "16:28:31", "remaining_time": "0:18:04"}
384
+ {"current_steps": 384, "total_steps": 390, "loss": 0.3954, "lr": 2.8832588717164766e-08, "epoch": 4.9152000000000005, "percentage": 98.46, "elapsed_time": "16:31:06", "remaining_time": "0:15:29"}
385
+ {"current_steps": 385, "total_steps": 390, "loss": 0.3986, "lr": 2.0024101508555604e-08, "epoch": 4.928, "percentage": 98.72, "elapsed_time": "16:33:38", "remaining_time": "0:12:54"}
386
+ {"current_steps": 386, "total_steps": 390, "loss": 0.3985, "lr": 1.281619499029274e-08, "epoch": 4.9408, "percentage": 98.97, "elapsed_time": "16:36:16", "remaining_time": "0:10:19"}
387
+ {"current_steps": 387, "total_steps": 390, "loss": 0.3888, "lr": 7.209446582292501e-09, "epoch": 4.9536, "percentage": 99.23, "elapsed_time": "16:38:46", "remaining_time": "0:07:44"}
388
+ {"current_steps": 388, "total_steps": 390, "loss": 0.3949, "lr": 3.2043054369057523e-09, "epoch": 4.9664, "percentage": 99.49, "elapsed_time": "16:41:20", "remaining_time": "0:05:09"}
389
+ {"current_steps": 389, "total_steps": 390, "loss": 0.394, "lr": 8.010924029533406e-10, "epoch": 4.9792, "percentage": 99.74, "elapsed_time": "16:43:46", "remaining_time": "0:02:34"}
390
+ {"current_steps": 390, "total_steps": 390, "loss": 0.4151, "lr": 0.0, "epoch": 4.992, "percentage": 100.0, "elapsed_time": "16:46:28", "remaining_time": "0:00:00"}
391
+ {"current_steps": 390, "total_steps": 390, "epoch": 4.992, "percentage": 100.0, "elapsed_time": "16:48:05", "remaining_time": "0:00:00"}