neginr commited on
Commit
05a531e
·
verified ·
1 Parent(s): 532edc6

Training in progress, epoch 3

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d7d90190d1147db3ead4587e7430a9643b5efac59e4b1ee494b2640ac97d753
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69bb19d57c91daf7eaf2b8e4242e49829957b3f887c232c93a6b5ac5c90100c0
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e798a3500fd6813ef5003c38f8483be4a987b8642706a9eafa8d1f8aac38c1a
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b68859705c4119a9948135fa1f8daf57d09c90dd64991116c97dac559357a67b
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a506b4bdd7382ccadbb8bf486ed2d1dca063e132aad67909f6157a27f38ac13c
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb63d8848d066358f6bf0ad6a60194a310651546c5eb4ce950969716513fde4b
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaf624cfca7560786db93bba51f303281f18fba3f508df3b0aaac0d6230f35da
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b2d805686cd93fa1e42678382cd091ef2af53b6cd36c8a23329080a503e72f2
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -243,3 +243,83 @@
243
  {"current_steps": 243, "total_steps": 390, "loss": 0.1605, "lr": 1.4953416182715566e-05, "epoch": 3.1104, "percentage": 62.31, "elapsed_time": "1:13:12", "remaining_time": "0:44:17"}
244
  {"current_steps": 244, "total_steps": 390, "loss": 0.1513, "lr": 1.478040494913926e-05, "epoch": 3.1232, "percentage": 62.56, "elapsed_time": "1:13:24", "remaining_time": "0:43:55"}
245
  {"current_steps": 245, "total_steps": 390, "loss": 0.1716, "lr": 1.460781185335713e-05, "epoch": 3.136, "percentage": 62.82, "elapsed_time": "1:13:43", "remaining_time": "0:43:37"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  {"current_steps": 243, "total_steps": 390, "loss": 0.1605, "lr": 1.4953416182715566e-05, "epoch": 3.1104, "percentage": 62.31, "elapsed_time": "1:13:12", "remaining_time": "0:44:17"}
244
  {"current_steps": 244, "total_steps": 390, "loss": 0.1513, "lr": 1.478040494913926e-05, "epoch": 3.1232, "percentage": 62.56, "elapsed_time": "1:13:24", "remaining_time": "0:43:55"}
245
  {"current_steps": 245, "total_steps": 390, "loss": 0.1716, "lr": 1.460781185335713e-05, "epoch": 3.136, "percentage": 62.82, "elapsed_time": "1:13:43", "remaining_time": "0:43:37"}
246
+ {"current_steps": 246, "total_steps": 390, "loss": 0.1532, "lr": 1.443565072167095e-05, "epoch": 3.1488, "percentage": 63.08, "elapsed_time": "1:14:00", "remaining_time": "0:43:19"}
247
+ {"current_steps": 247, "total_steps": 390, "loss": 0.1513, "lr": 1.4263935345778202e-05, "epoch": 3.1616, "percentage": 63.33, "elapsed_time": "1:14:21", "remaining_time": "0:43:02"}
248
+ {"current_steps": 248, "total_steps": 390, "loss": 0.1511, "lr": 1.409267948166718e-05, "epoch": 3.1744, "percentage": 63.59, "elapsed_time": "1:14:39", "remaining_time": "0:42:44"}
249
+ {"current_steps": 249, "total_steps": 390, "loss": 0.1497, "lr": 1.3921896848515064e-05, "epoch": 3.1872, "percentage": 63.85, "elapsed_time": "1:14:59", "remaining_time": "0:42:27"}
250
+ {"current_steps": 250, "total_steps": 390, "loss": 0.1621, "lr": 1.3751601127588849e-05, "epoch": 3.2, "percentage": 64.1, "elapsed_time": "1:15:15", "remaining_time": "0:42:08"}
251
+ {"current_steps": 251, "total_steps": 390, "loss": 0.1368, "lr": 1.3581805961149371e-05, "epoch": 3.2128, "percentage": 64.36, "elapsed_time": "1:15:30", "remaining_time": "0:41:48"}
252
+ {"current_steps": 252, "total_steps": 390, "loss": 0.1612, "lr": 1.341252495135841e-05, "epoch": 3.2256, "percentage": 64.62, "elapsed_time": "1:15:45", "remaining_time": "0:41:29"}
253
+ {"current_steps": 253, "total_steps": 390, "loss": 0.1663, "lr": 1.324377165918906e-05, "epoch": 3.2384, "percentage": 64.87, "elapsed_time": "1:15:57", "remaining_time": "0:41:07"}
254
+ {"current_steps": 254, "total_steps": 390, "loss": 0.132, "lr": 1.3075559603339354e-05, "epoch": 3.2512, "percentage": 65.13, "elapsed_time": "1:16:17", "remaining_time": "0:40:51"}
255
+ {"current_steps": 255, "total_steps": 390, "loss": 0.1562, "lr": 1.2907902259149287e-05, "epoch": 3.2640000000000002, "percentage": 65.38, "elapsed_time": "1:16:34", "remaining_time": "0:40:32"}
256
+ {"current_steps": 256, "total_steps": 390, "loss": 0.1263, "lr": 1.274081305752135e-05, "epoch": 3.2768, "percentage": 65.64, "elapsed_time": "1:16:45", "remaining_time": "0:40:10"}
257
+ {"current_steps": 257, "total_steps": 390, "loss": 0.1567, "lr": 1.2574305383844528e-05, "epoch": 3.2896, "percentage": 65.9, "elapsed_time": "1:17:02", "remaining_time": "0:39:52"}
258
+ {"current_steps": 258, "total_steps": 390, "loss": 0.1414, "lr": 1.2408392576922075e-05, "epoch": 3.3024, "percentage": 66.15, "elapsed_time": "1:17:15", "remaining_time": "0:39:31"}
259
+ {"current_steps": 259, "total_steps": 390, "loss": 0.129, "lr": 1.2243087927902905e-05, "epoch": 3.3152, "percentage": 66.41, "elapsed_time": "1:17:28", "remaining_time": "0:39:10"}
260
+ {"current_steps": 260, "total_steps": 390, "loss": 0.1364, "lr": 1.2078404679216864e-05, "epoch": 3.328, "percentage": 66.67, "elapsed_time": "1:17:41", "remaining_time": "0:38:50"}
261
+ {"current_steps": 261, "total_steps": 390, "loss": 0.1631, "lr": 1.1914356023513904e-05, "epoch": 3.3407999999999998, "percentage": 66.92, "elapsed_time": "1:17:57", "remaining_time": "0:38:31"}
262
+ {"current_steps": 262, "total_steps": 390, "loss": 0.1673, "lr": 1.1750955102607193e-05, "epoch": 3.3536, "percentage": 67.18, "elapsed_time": "1:18:15", "remaining_time": "0:38:14"}
263
+ {"current_steps": 263, "total_steps": 390, "loss": 0.1544, "lr": 1.1588215006420374e-05, "epoch": 3.3664, "percentage": 67.44, "elapsed_time": "1:18:29", "remaining_time": "0:37:54"}
264
+ {"current_steps": 264, "total_steps": 390, "loss": 0.1512, "lr": 1.1426148771938915e-05, "epoch": 3.3792, "percentage": 67.69, "elapsed_time": "1:18:44", "remaining_time": "0:37:34"}
265
+ {"current_steps": 265, "total_steps": 390, "loss": 0.1464, "lr": 1.1264769382165748e-05, "epoch": 3.392, "percentage": 67.95, "elapsed_time": "1:19:03", "remaining_time": "0:37:17"}
266
+ {"current_steps": 266, "total_steps": 390, "loss": 0.1398, "lr": 1.110408976508118e-05, "epoch": 3.4048, "percentage": 68.21, "elapsed_time": "1:19:14", "remaining_time": "0:36:56"}
267
+ {"current_steps": 267, "total_steps": 390, "loss": 0.1293, "lr": 1.094412279260726e-05, "epoch": 3.4176, "percentage": 68.46, "elapsed_time": "1:19:30", "remaining_time": "0:36:37"}
268
+ {"current_steps": 268, "total_steps": 390, "loss": 0.1362, "lr": 1.0784881279576635e-05, "epoch": 3.4304, "percentage": 68.72, "elapsed_time": "1:19:44", "remaining_time": "0:36:18"}
269
+ {"current_steps": 269, "total_steps": 390, "loss": 0.139, "lr": 1.0626377982705929e-05, "epoch": 3.4432, "percentage": 68.97, "elapsed_time": "1:20:02", "remaining_time": "0:36:00"}
270
+ {"current_steps": 270, "total_steps": 390, "loss": 0.1216, "lr": 1.0468625599573842e-05, "epoch": 3.456, "percentage": 69.23, "elapsed_time": "1:20:18", "remaining_time": "0:35:41"}
271
+ {"current_steps": 271, "total_steps": 390, "loss": 0.137, "lr": 1.0311636767603952e-05, "epoch": 3.4688, "percentage": 69.49, "elapsed_time": "1:20:32", "remaining_time": "0:35:22"}
272
+ {"current_steps": 272, "total_steps": 390, "loss": 0.1383, "lr": 1.0155424063052306e-05, "epoch": 3.4816, "percentage": 69.74, "elapsed_time": "1:20:52", "remaining_time": "0:35:05"}
273
+ {"current_steps": 273, "total_steps": 390, "loss": 0.1586, "lr": 1.0000000000000006e-05, "epoch": 3.4944, "percentage": 70.0, "elapsed_time": "1:21:10", "remaining_time": "0:34:47"}
274
+ {"current_steps": 274, "total_steps": 390, "loss": 0.1778, "lr": 9.84537702935065e-06, "epoch": 3.5072, "percentage": 70.26, "elapsed_time": "1:21:27", "remaining_time": "0:34:29"}
275
+ {"current_steps": 275, "total_steps": 390, "loss": 0.1446, "lr": 9.691567537832964e-06, "epoch": 3.52, "percentage": 70.51, "elapsed_time": "1:21:43", "remaining_time": "0:34:10"}
276
+ {"current_steps": 276, "total_steps": 390, "loss": 0.1442, "lr": 9.538583847008452e-06, "epoch": 3.5328, "percentage": 70.77, "elapsed_time": "1:21:57", "remaining_time": "0:33:51"}
277
+ {"current_steps": 277, "total_steps": 390, "loss": 0.1486, "lr": 9.386438212284372e-06, "epoch": 3.5456, "percentage": 71.03, "elapsed_time": "1:22:13", "remaining_time": "0:33:32"}
278
+ {"current_steps": 278, "total_steps": 390, "loss": 0.1454, "lr": 9.235142821931928e-06, "epoch": 3.5584, "percentage": 71.28, "elapsed_time": "1:22:33", "remaining_time": "0:33:15"}
279
+ {"current_steps": 279, "total_steps": 390, "loss": 0.1407, "lr": 9.084709796109907e-06, "epoch": 3.5712, "percentage": 71.54, "elapsed_time": "1:22:48", "remaining_time": "0:32:56"}
280
+ {"current_steps": 280, "total_steps": 390, "loss": 0.1619, "lr": 8.93515118589373e-06, "epoch": 3.584, "percentage": 71.79, "elapsed_time": "1:23:07", "remaining_time": "0:32:39"}
281
+ {"current_steps": 281, "total_steps": 390, "loss": 0.156, "lr": 8.786478972310023e-06, "epoch": 3.5968, "percentage": 72.05, "elapsed_time": "1:23:23", "remaining_time": "0:32:21"}
282
+ {"current_steps": 282, "total_steps": 390, "loss": 0.1366, "lr": 8.638705065376887e-06, "epoch": 3.6096, "percentage": 72.31, "elapsed_time": "1:23:41", "remaining_time": "0:32:03"}
283
+ {"current_steps": 283, "total_steps": 390, "loss": 0.1436, "lr": 8.491841303149728e-06, "epoch": 3.6224, "percentage": 72.56, "elapsed_time": "1:23:59", "remaining_time": "0:31:45"}
284
+ {"current_steps": 284, "total_steps": 390, "loss": 0.1305, "lr": 8.345899450772975e-06, "epoch": 3.6352, "percentage": 72.82, "elapsed_time": "1:24:12", "remaining_time": "0:31:25"}
285
+ {"current_steps": 285, "total_steps": 390, "loss": 0.122, "lr": 8.200891199537549e-06, "epoch": 3.648, "percentage": 73.08, "elapsed_time": "1:24:33", "remaining_time": "0:31:09"}
286
+ {"current_steps": 286, "total_steps": 390, "loss": 0.1366, "lr": 8.056828165944282e-06, "epoch": 3.6608, "percentage": 73.33, "elapsed_time": "1:24:46", "remaining_time": "0:30:49"}
287
+ {"current_steps": 287, "total_steps": 390, "loss": 0.1358, "lr": 7.913721890773354e-06, "epoch": 3.6736, "percentage": 73.59, "elapsed_time": "1:24:58", "remaining_time": "0:30:29"}
288
+ {"current_steps": 288, "total_steps": 390, "loss": 0.1365, "lr": 7.771583838159756e-06, "epoch": 3.6864, "percentage": 73.85, "elapsed_time": "1:25:13", "remaining_time": "0:30:11"}
289
+ {"current_steps": 289, "total_steps": 390, "loss": 0.1323, "lr": 7.630425394674903e-06, "epoch": 3.6992000000000003, "percentage": 74.1, "elapsed_time": "1:25:28", "remaining_time": "0:29:52"}
290
+ {"current_steps": 290, "total_steps": 390, "loss": 0.1796, "lr": 7.49025786841445e-06, "epoch": 3.7119999999999997, "percentage": 74.36, "elapsed_time": "1:25:48", "remaining_time": "0:29:35"}
291
+ {"current_steps": 291, "total_steps": 390, "loss": 0.146, "lr": 7.3510924880924575e-06, "epoch": 3.7248, "percentage": 74.62, "elapsed_time": "1:26:02", "remaining_time": "0:29:16"}
292
+ {"current_steps": 292, "total_steps": 390, "loss": 0.1574, "lr": 7.212940402141808e-06, "epoch": 3.7376, "percentage": 74.87, "elapsed_time": "1:26:26", "remaining_time": "0:29:00"}
293
+ {"current_steps": 293, "total_steps": 390, "loss": 0.1655, "lr": 7.075812677821145e-06, "epoch": 3.7504, "percentage": 75.13, "elapsed_time": "1:26:50", "remaining_time": "0:28:44"}
294
+ {"current_steps": 294, "total_steps": 390, "loss": 0.1522, "lr": 6.939720300328303e-06, "epoch": 3.7632, "percentage": 75.38, "elapsed_time": "1:27:06", "remaining_time": "0:28:26"}
295
+ {"current_steps": 295, "total_steps": 390, "loss": 0.1397, "lr": 6.8046741719202385e-06, "epoch": 3.776, "percentage": 75.64, "elapsed_time": "1:27:20", "remaining_time": "0:28:07"}
296
+ {"current_steps": 296, "total_steps": 390, "loss": 0.1307, "lr": 6.67068511103971e-06, "epoch": 3.7888, "percentage": 75.9, "elapsed_time": "1:27:35", "remaining_time": "0:27:48"}
297
+ {"current_steps": 297, "total_steps": 390, "loss": 0.1387, "lr": 6.537763851448593e-06, "epoch": 3.8016, "percentage": 76.15, "elapsed_time": "1:27:48", "remaining_time": "0:27:29"}
298
+ {"current_steps": 298, "total_steps": 390, "loss": 0.14, "lr": 6.4059210413680175e-06, "epoch": 3.8144, "percentage": 76.41, "elapsed_time": "1:28:07", "remaining_time": "0:27:12"}
299
+ {"current_steps": 299, "total_steps": 390, "loss": 0.134, "lr": 6.275167242625331e-06, "epoch": 3.8272, "percentage": 76.67, "elapsed_time": "1:28:24", "remaining_time": "0:26:54"}
300
+ {"current_steps": 300, "total_steps": 390, "loss": 0.139, "lr": 6.145512929808013e-06, "epoch": 3.84, "percentage": 76.92, "elapsed_time": "1:28:36", "remaining_time": "0:26:34"}
301
+ {"current_steps": 301, "total_steps": 390, "loss": 0.1603, "lr": 6.016968489424572e-06, "epoch": 3.8528000000000002, "percentage": 77.18, "elapsed_time": "1:28:53", "remaining_time": "0:26:16"}
302
+ {"current_steps": 302, "total_steps": 390, "loss": 0.1389, "lr": 5.889544219072465e-06, "epoch": 3.8656, "percentage": 77.44, "elapsed_time": "1:29:07", "remaining_time": "0:25:58"}
303
+ {"current_steps": 303, "total_steps": 390, "loss": 0.1424, "lr": 5.7632503266131925e-06, "epoch": 3.8784, "percentage": 77.69, "elapsed_time": "1:29:24", "remaining_time": "0:25:40"}
304
+ {"current_steps": 304, "total_steps": 390, "loss": 0.153, "lr": 5.638096929354522e-06, "epoch": 3.8912, "percentage": 77.95, "elapsed_time": "1:29:43", "remaining_time": "0:25:22"}
305
+ {"current_steps": 305, "total_steps": 390, "loss": 0.1576, "lr": 5.514094053240035e-06, "epoch": 3.904, "percentage": 78.21, "elapsed_time": "1:29:58", "remaining_time": "0:25:04"}
306
+ {"current_steps": 306, "total_steps": 390, "loss": 0.1538, "lr": 5.39125163204594e-06, "epoch": 3.9168, "percentage": 78.46, "elapsed_time": "1:30:15", "remaining_time": "0:24:46"}
307
+ {"current_steps": 307, "total_steps": 390, "loss": 0.143, "lr": 5.269579506585259e-06, "epoch": 3.9295999999999998, "percentage": 78.72, "elapsed_time": "1:30:28", "remaining_time": "0:24:27"}
308
+ {"current_steps": 308, "total_steps": 390, "loss": 0.1532, "lr": 5.149087423919541e-06, "epoch": 3.9424, "percentage": 78.97, "elapsed_time": "1:30:46", "remaining_time": "0:24:10"}
309
+ {"current_steps": 309, "total_steps": 390, "loss": 0.1288, "lr": 5.029785036577976e-06, "epoch": 3.9552, "percentage": 79.23, "elapsed_time": "1:31:10", "remaining_time": "0:23:54"}
310
+ {"current_steps": 310, "total_steps": 390, "loss": 0.1446, "lr": 4.911681901784198e-06, "epoch": 3.968, "percentage": 79.49, "elapsed_time": "1:31:29", "remaining_time": "0:23:36"}
311
+ {"current_steps": 311, "total_steps": 390, "loss": 0.1503, "lr": 4.794787480690597e-06, "epoch": 3.9808, "percentage": 79.74, "elapsed_time": "1:31:44", "remaining_time": "0:23:18"}
312
+ {"current_steps": 312, "total_steps": 390, "loss": 0.1503, "lr": 4.679111137620442e-06, "epoch": 3.9936, "percentage": 80.0, "elapsed_time": "1:31:59", "remaining_time": "0:22:59"}
313
+ {"current_steps": 313, "total_steps": 390, "loss": 0.1334, "lr": 4.5646621393177e-06, "epoch": 4.0064, "percentage": 80.26, "elapsed_time": "1:34:11", "remaining_time": "0:23:10"}
314
+ {"current_steps": 314, "total_steps": 390, "loss": 0.0832, "lr": 4.451449654204685e-06, "epoch": 4.0192, "percentage": 80.51, "elapsed_time": "1:34:24", "remaining_time": "0:22:51"}
315
+ {"current_steps": 315, "total_steps": 390, "loss": 0.0975, "lr": 4.339482751647557e-06, "epoch": 4.032, "percentage": 80.77, "elapsed_time": "1:34:44", "remaining_time": "0:22:33"}
316
+ {"current_steps": 316, "total_steps": 390, "loss": 0.0798, "lr": 4.228770401229824e-06, "epoch": 4.0448, "percentage": 81.03, "elapsed_time": "1:34:57", "remaining_time": "0:22:14"}
317
+ {"current_steps": 317, "total_steps": 390, "loss": 0.0932, "lr": 4.119321472033779e-06, "epoch": 4.0576, "percentage": 81.28, "elapsed_time": "1:35:10", "remaining_time": "0:21:55"}
318
+ {"current_steps": 318, "total_steps": 390, "loss": 0.0899, "lr": 4.011144731929981e-06, "epoch": 4.0704, "percentage": 81.54, "elapsed_time": "1:35:26", "remaining_time": "0:21:36"}
319
+ {"current_steps": 319, "total_steps": 390, "loss": 0.0952, "lr": 3.904248846874894e-06, "epoch": 4.0832, "percentage": 81.79, "elapsed_time": "1:35:42", "remaining_time": "0:21:18"}
320
+ {"current_steps": 320, "total_steps": 390, "loss": 0.0874, "lr": 3.7986423802166705e-06, "epoch": 4.096, "percentage": 82.05, "elapsed_time": "1:36:01", "remaining_time": "0:21:00"}
321
+ {"current_steps": 321, "total_steps": 390, "loss": 0.1048, "lr": 3.694333792009115e-06, "epoch": 4.1088, "percentage": 82.31, "elapsed_time": "1:36:20", "remaining_time": "0:20:42"}
322
+ {"current_steps": 322, "total_steps": 390, "loss": 0.0779, "lr": 3.5913314383339937e-06, "epoch": 4.1216, "percentage": 82.56, "elapsed_time": "1:36:36", "remaining_time": "0:20:24"}
323
+ {"current_steps": 323, "total_steps": 390, "loss": 0.0952, "lr": 3.4896435706316e-06, "epoch": 4.1344, "percentage": 82.82, "elapsed_time": "1:36:48", "remaining_time": "0:20:04"}
324
+ {"current_steps": 324, "total_steps": 390, "loss": 0.0889, "lr": 3.3892783350397675e-06, "epoch": 4.1472, "percentage": 83.08, "elapsed_time": "1:37:03", "remaining_time": "0:19:46"}
325
+ {"current_steps": 325, "total_steps": 390, "loss": 0.0935, "lr": 3.290243771741275e-06, "epoch": 4.16, "percentage": 83.33, "elapsed_time": "1:37:20", "remaining_time": "0:19:28"}