ryanmarten commited on
Commit
d7a71cb
·
verified ·
1 Parent(s): aa5b60d

Training in progress, epoch 3

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:228dd4d66403f7f21018868ecbf9bdbe7d500c1621e9f8fc85a090b0b384f006
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fca25bf42b4bc5e1c25abd6a9b6368ede85157c830d283016131411e346393b6
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d2da15a387fa172767a0ccf07f2a69c1f05c3177900178d587c8a0cf6aea0b6
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:769418db572b72b7c16a9044c7c6261d7033ddf2011b16607014d894eeede3fc
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9ae187690896bf96dbea7f6135e83df346cc546df95c80a91a6632ec22165e7
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b193b14d03f15afaf7bf6910a027f8fd24ea5302ff9449e5967d3f6bfe2c3591
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f58d139edb299bc7406d7349f6554fc677a5da59f7d25187174274deef50f87c
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b3f5cbd95dbd8938e6a1914e2e0c94a6e39c707d416745276b73aea6dc23c8a
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -233,3 +233,82 @@
233
  {"current_steps": 233, "total_steps": 390, "loss": 0.4575, "lr": 1.6703461793876876e-05, "epoch": 2.9824, "percentage": 59.74, "elapsed_time": "9:44:39", "remaining_time": "6:33:57"}
234
  {"current_steps": 234, "total_steps": 390, "loss": 0.4974, "lr": 1.6527036446661396e-05, "epoch": 2.9952, "percentage": 60.0, "elapsed_time": "9:47:07", "remaining_time": "6:31:24"}
235
  {"current_steps": 235, "total_steps": 390, "loss": 0.441, "lr": 1.635088931591775e-05, "epoch": 3.008, "percentage": 60.26, "elapsed_time": "9:50:24", "remaining_time": "6:29:24"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  {"current_steps": 233, "total_steps": 390, "loss": 0.4575, "lr": 1.6703461793876876e-05, "epoch": 2.9824, "percentage": 59.74, "elapsed_time": "9:44:39", "remaining_time": "6:33:57"}
234
  {"current_steps": 234, "total_steps": 390, "loss": 0.4974, "lr": 1.6527036446661396e-05, "epoch": 2.9952, "percentage": 60.0, "elapsed_time": "9:47:07", "remaining_time": "6:31:24"}
235
  {"current_steps": 235, "total_steps": 390, "loss": 0.441, "lr": 1.635088931591775e-05, "epoch": 3.008, "percentage": 60.26, "elapsed_time": "9:50:24", "remaining_time": "6:29:24"}
236
+ {"current_steps": 236, "total_steps": 390, "loss": 0.4183, "lr": 1.6175034512658753e-05, "epoch": 3.0208, "percentage": 60.51, "elapsed_time": "9:52:42", "remaining_time": "6:26:45"}
237
+ {"current_steps": 237, "total_steps": 390, "loss": 0.4075, "lr": 1.5999486124479115e-05, "epoch": 3.0336, "percentage": 60.77, "elapsed_time": "9:55:16", "remaining_time": "6:24:17"}
238
+ {"current_steps": 238, "total_steps": 390, "loss": 0.4159, "lr": 1.5824258214426833e-05, "epoch": 3.0464, "percentage": 61.03, "elapsed_time": "9:57:50", "remaining_time": "6:21:48"}
239
+ {"current_steps": 239, "total_steps": 390, "loss": 0.3737, "lr": 1.5649364819876655e-05, "epoch": 3.0592, "percentage": 61.28, "elapsed_time": "10:00:17", "remaining_time": "6:19:15"}
240
+ {"current_steps": 240, "total_steps": 390, "loss": 0.3745, "lr": 1.547481995140556e-05, "epoch": 3.072, "percentage": 61.54, "elapsed_time": "10:02:53", "remaining_time": "6:16:48"}
241
+ {"current_steps": 241, "total_steps": 390, "loss": 0.4006, "lr": 1.5300637591670357e-05, "epoch": 3.0848, "percentage": 61.79, "elapsed_time": "10:05:36", "remaining_time": "6:14:25"}
242
+ {"current_steps": 242, "total_steps": 390, "loss": 0.4315, "lr": 1.5126831694287564e-05, "epoch": 3.0976, "percentage": 62.05, "elapsed_time": "10:08:14", "remaining_time": "6:11:58"}
243
+ {"current_steps": 243, "total_steps": 390, "loss": 0.4102, "lr": 1.4953416182715566e-05, "epoch": 3.1104, "percentage": 62.31, "elapsed_time": "10:10:47", "remaining_time": "6:09:29"}
244
+ {"current_steps": 244, "total_steps": 390, "loss": 0.3974, "lr": 1.478040494913926e-05, "epoch": 3.1232, "percentage": 62.56, "elapsed_time": "10:13:06", "remaining_time": "6:06:51"}
245
+ {"current_steps": 245, "total_steps": 390, "loss": 0.4205, "lr": 1.460781185335713e-05, "epoch": 3.136, "percentage": 62.82, "elapsed_time": "10:15:51", "remaining_time": "6:04:29"}
246
+ {"current_steps": 246, "total_steps": 390, "loss": 0.3974, "lr": 1.443565072167095e-05, "epoch": 3.1488, "percentage": 63.08, "elapsed_time": "10:18:23", "remaining_time": "6:01:59"}
247
+ {"current_steps": 247, "total_steps": 390, "loss": 0.3948, "lr": 1.4263935345778202e-05, "epoch": 3.1616, "percentage": 63.33, "elapsed_time": "10:20:57", "remaining_time": "5:59:30"}
248
+ {"current_steps": 248, "total_steps": 390, "loss": 0.4054, "lr": 1.409267948166718e-05, "epoch": 3.1744, "percentage": 63.59, "elapsed_time": "10:23:26", "remaining_time": "5:56:58"}
249
+ {"current_steps": 249, "total_steps": 390, "loss": 0.4076, "lr": 1.3921896848515064e-05, "epoch": 3.1872, "percentage": 63.85, "elapsed_time": "10:25:55", "remaining_time": "5:54:26"}
250
+ {"current_steps": 250, "total_steps": 390, "loss": 0.3864, "lr": 1.3751601127588849e-05, "epoch": 3.2, "percentage": 64.1, "elapsed_time": "10:28:21", "remaining_time": "5:51:53"}
251
+ {"current_steps": 251, "total_steps": 390, "loss": 0.393, "lr": 1.3581805961149371e-05, "epoch": 3.2128, "percentage": 64.36, "elapsed_time": "10:30:52", "remaining_time": "5:49:22"}
252
+ {"current_steps": 252, "total_steps": 390, "loss": 0.406, "lr": 1.341252495135841e-05, "epoch": 3.2256, "percentage": 64.62, "elapsed_time": "10:33:30", "remaining_time": "5:46:55"}
253
+ {"current_steps": 253, "total_steps": 390, "loss": 0.3905, "lr": 1.324377165918906e-05, "epoch": 3.2384, "percentage": 64.87, "elapsed_time": "10:35:58", "remaining_time": "5:44:22"}
254
+ {"current_steps": 254, "total_steps": 390, "loss": 0.3874, "lr": 1.3075559603339354e-05, "epoch": 3.2512, "percentage": 65.13, "elapsed_time": "10:38:30", "remaining_time": "5:41:52"}
255
+ {"current_steps": 255, "total_steps": 390, "loss": 0.4292, "lr": 1.2907902259149287e-05, "epoch": 3.2640000000000002, "percentage": 65.38, "elapsed_time": "10:41:03", "remaining_time": "5:39:23"}
256
+ {"current_steps": 256, "total_steps": 390, "loss": 0.4023, "lr": 1.274081305752135e-05, "epoch": 3.2768, "percentage": 65.64, "elapsed_time": "10:43:36", "remaining_time": "5:36:53"}
257
+ {"current_steps": 257, "total_steps": 390, "loss": 0.4167, "lr": 1.2574305383844528e-05, "epoch": 3.2896, "percentage": 65.9, "elapsed_time": "10:46:19", "remaining_time": "5:34:28"}
258
+ {"current_steps": 258, "total_steps": 390, "loss": 0.414, "lr": 1.2408392576922075e-05, "epoch": 3.3024, "percentage": 66.15, "elapsed_time": "10:48:46", "remaining_time": "5:31:55"}
259
+ {"current_steps": 259, "total_steps": 390, "loss": 0.4129, "lr": 1.2243087927902905e-05, "epoch": 3.3152, "percentage": 66.41, "elapsed_time": "10:51:07", "remaining_time": "5:29:19"}
260
+ {"current_steps": 260, "total_steps": 390, "loss": 0.3996, "lr": 1.2078404679216864e-05, "epoch": 3.328, "percentage": 66.67, "elapsed_time": "10:53:17", "remaining_time": "5:26:38"}
261
+ {"current_steps": 261, "total_steps": 390, "loss": 0.3962, "lr": 1.1914356023513904e-05, "epoch": 3.3407999999999998, "percentage": 66.92, "elapsed_time": "10:55:41", "remaining_time": "5:24:04"}
262
+ {"current_steps": 262, "total_steps": 390, "loss": 0.4149, "lr": 1.1750955102607193e-05, "epoch": 3.3536, "percentage": 67.18, "elapsed_time": "10:58:09", "remaining_time": "5:21:32"}
263
+ {"current_steps": 263, "total_steps": 390, "loss": 0.3802, "lr": 1.1588215006420374e-05, "epoch": 3.3664, "percentage": 67.44, "elapsed_time": "11:00:36", "remaining_time": "5:19:00"}
264
+ {"current_steps": 264, "total_steps": 390, "loss": 0.4085, "lr": 1.1426148771938915e-05, "epoch": 3.3792, "percentage": 67.69, "elapsed_time": "11:03:11", "remaining_time": "5:16:31"}
265
+ {"current_steps": 265, "total_steps": 390, "loss": 0.4046, "lr": 1.1264769382165748e-05, "epoch": 3.392, "percentage": 67.95, "elapsed_time": "11:05:53", "remaining_time": "5:14:05"}
266
+ {"current_steps": 266, "total_steps": 390, "loss": 0.4258, "lr": 1.110408976508118e-05, "epoch": 3.4048, "percentage": 68.21, "elapsed_time": "11:08:35", "remaining_time": "5:11:40"}
267
+ {"current_steps": 267, "total_steps": 390, "loss": 0.4073, "lr": 1.094412279260726e-05, "epoch": 3.4176, "percentage": 68.46, "elapsed_time": "11:11:12", "remaining_time": "5:09:12"}
268
+ {"current_steps": 268, "total_steps": 390, "loss": 0.3911, "lr": 1.0784881279576635e-05, "epoch": 3.4304, "percentage": 68.72, "elapsed_time": "11:13:42", "remaining_time": "5:06:41"}
269
+ {"current_steps": 269, "total_steps": 390, "loss": 0.3907, "lr": 1.0626377982705929e-05, "epoch": 3.4432, "percentage": 68.97, "elapsed_time": "11:16:13", "remaining_time": "5:04:10"}
270
+ {"current_steps": 270, "total_steps": 390, "loss": 0.401, "lr": 1.0468625599573842e-05, "epoch": 3.456, "percentage": 69.23, "elapsed_time": "11:18:45", "remaining_time": "5:01:40"}
271
+ {"current_steps": 271, "total_steps": 390, "loss": 0.4049, "lr": 1.0311636767603952e-05, "epoch": 3.4688, "percentage": 69.49, "elapsed_time": "11:21:28", "remaining_time": "4:59:14"}
272
+ {"current_steps": 272, "total_steps": 390, "loss": 0.4105, "lr": 1.0155424063052306e-05, "epoch": 3.4816, "percentage": 69.74, "elapsed_time": "11:24:00", "remaining_time": "4:56:44"}
273
+ {"current_steps": 273, "total_steps": 390, "loss": 0.3962, "lr": 1.0000000000000006e-05, "epoch": 3.4944, "percentage": 70.0, "elapsed_time": "11:26:43", "remaining_time": "4:54:18"}
274
+ {"current_steps": 274, "total_steps": 390, "loss": 0.383, "lr": 9.84537702935065e-06, "epoch": 3.5072, "percentage": 70.26, "elapsed_time": "11:29:06", "remaining_time": "4:51:44"}
275
+ {"current_steps": 275, "total_steps": 390, "loss": 0.4112, "lr": 9.691567537832964e-06, "epoch": 3.52, "percentage": 70.51, "elapsed_time": "11:31:36", "remaining_time": "4:49:12"}
276
+ {"current_steps": 276, "total_steps": 390, "loss": 0.4025, "lr": 9.538583847008452e-06, "epoch": 3.5328, "percentage": 70.77, "elapsed_time": "11:34:15", "remaining_time": "4:46:45"}
277
+ {"current_steps": 277, "total_steps": 390, "loss": 0.3987, "lr": 9.386438212284372e-06, "epoch": 3.5456, "percentage": 71.03, "elapsed_time": "11:36:42", "remaining_time": "4:44:12"}
278
+ {"current_steps": 278, "total_steps": 390, "loss": 0.365, "lr": 9.235142821931928e-06, "epoch": 3.5584, "percentage": 71.28, "elapsed_time": "11:38:58", "remaining_time": "4:41:36"}
279
+ {"current_steps": 279, "total_steps": 390, "loss": 0.4075, "lr": 9.084709796109907e-06, "epoch": 3.5712, "percentage": 71.54, "elapsed_time": "11:41:33", "remaining_time": "4:39:06"}
280
+ {"current_steps": 280, "total_steps": 390, "loss": 0.4021, "lr": 8.93515118589373e-06, "epoch": 3.584, "percentage": 71.79, "elapsed_time": "11:43:59", "remaining_time": "4:36:34"}
281
+ {"current_steps": 281, "total_steps": 390, "loss": 0.3759, "lr": 8.786478972310023e-06, "epoch": 3.5968, "percentage": 72.05, "elapsed_time": "11:46:32", "remaining_time": "4:34:03"}
282
+ {"current_steps": 282, "total_steps": 390, "loss": 0.3948, "lr": 8.638705065376887e-06, "epoch": 3.6096, "percentage": 72.31, "elapsed_time": "11:49:05", "remaining_time": "4:31:34"}
283
+ {"current_steps": 283, "total_steps": 390, "loss": 0.4125, "lr": 8.491841303149728e-06, "epoch": 3.6224, "percentage": 72.56, "elapsed_time": "11:51:28", "remaining_time": "4:29:00"}
284
+ {"current_steps": 284, "total_steps": 390, "loss": 0.3972, "lr": 8.345899450772975e-06, "epoch": 3.6352, "percentage": 72.82, "elapsed_time": "11:53:54", "remaining_time": "4:26:27"}
285
+ {"current_steps": 285, "total_steps": 390, "loss": 0.4098, "lr": 8.200891199537549e-06, "epoch": 3.648, "percentage": 73.08, "elapsed_time": "11:56:28", "remaining_time": "4:23:57"}
286
+ {"current_steps": 286, "total_steps": 390, "loss": 0.4026, "lr": 8.056828165944282e-06, "epoch": 3.6608, "percentage": 73.33, "elapsed_time": "11:58:59", "remaining_time": "4:21:27"}
287
+ {"current_steps": 287, "total_steps": 390, "loss": 0.4057, "lr": 7.913721890773354e-06, "epoch": 3.6736, "percentage": 73.59, "elapsed_time": "12:01:21", "remaining_time": "4:18:53"}
288
+ {"current_steps": 288, "total_steps": 390, "loss": 0.4026, "lr": 7.771583838159756e-06, "epoch": 3.6864, "percentage": 73.85, "elapsed_time": "12:04:01", "remaining_time": "4:16:25"}
289
+ {"current_steps": 289, "total_steps": 390, "loss": 0.4013, "lr": 7.630425394674903e-06, "epoch": 3.6992000000000003, "percentage": 74.1, "elapsed_time": "12:06:24", "remaining_time": "4:13:51"}
290
+ {"current_steps": 290, "total_steps": 390, "loss": 0.42, "lr": 7.49025786841445e-06, "epoch": 3.7119999999999997, "percentage": 74.36, "elapsed_time": "12:08:53", "remaining_time": "4:11:20"}
291
+ {"current_steps": 291, "total_steps": 390, "loss": 0.3919, "lr": 7.3510924880924575e-06, "epoch": 3.7248, "percentage": 74.62, "elapsed_time": "12:11:12", "remaining_time": "4:08:45"}
292
+ {"current_steps": 292, "total_steps": 390, "loss": 0.4025, "lr": 7.212940402141808e-06, "epoch": 3.7376, "percentage": 74.87, "elapsed_time": "12:13:34", "remaining_time": "4:06:11"}
293
+ {"current_steps": 293, "total_steps": 390, "loss": 0.3929, "lr": 7.075812677821145e-06, "epoch": 3.7504, "percentage": 75.13, "elapsed_time": "12:16:05", "remaining_time": "4:03:41"}
294
+ {"current_steps": 294, "total_steps": 390, "loss": 0.4051, "lr": 6.939720300328303e-06, "epoch": 3.7632, "percentage": 75.38, "elapsed_time": "12:18:44", "remaining_time": "4:01:13"}
295
+ {"current_steps": 295, "total_steps": 390, "loss": 0.3734, "lr": 6.8046741719202385e-06, "epoch": 3.776, "percentage": 75.64, "elapsed_time": "12:21:02", "remaining_time": "3:58:38"}
296
+ {"current_steps": 296, "total_steps": 390, "loss": 0.4141, "lr": 6.67068511103971e-06, "epoch": 3.7888, "percentage": 75.9, "elapsed_time": "12:23:34", "remaining_time": "3:56:08"}
297
+ {"current_steps": 297, "total_steps": 390, "loss": 0.3807, "lr": 6.537763851448593e-06, "epoch": 3.8016, "percentage": 76.15, "elapsed_time": "12:25:45", "remaining_time": "3:53:31"}
298
+ {"current_steps": 298, "total_steps": 390, "loss": 0.3947, "lr": 6.4059210413680175e-06, "epoch": 3.8144, "percentage": 76.41, "elapsed_time": "12:28:15", "remaining_time": "3:51:00"}
299
+ {"current_steps": 299, "total_steps": 390, "loss": 0.4128, "lr": 6.275167242625331e-06, "epoch": 3.8272, "percentage": 76.67, "elapsed_time": "12:30:58", "remaining_time": "3:48:33"}
300
+ {"current_steps": 300, "total_steps": 390, "loss": 0.3999, "lr": 6.145512929808013e-06, "epoch": 3.84, "percentage": 76.92, "elapsed_time": "12:33:41", "remaining_time": "3:46:06"}
301
+ {"current_steps": 301, "total_steps": 390, "loss": 0.4129, "lr": 6.016968489424572e-06, "epoch": 3.8528000000000002, "percentage": 77.18, "elapsed_time": "12:36:02", "remaining_time": "3:43:32"}
302
+ {"current_steps": 302, "total_steps": 390, "loss": 0.4036, "lr": 5.889544219072465e-06, "epoch": 3.8656, "percentage": 77.44, "elapsed_time": "12:38:24", "remaining_time": "3:40:59"}
303
+ {"current_steps": 303, "total_steps": 390, "loss": 0.3717, "lr": 5.7632503266131925e-06, "epoch": 3.8784, "percentage": 77.69, "elapsed_time": "12:40:42", "remaining_time": "3:38:25"}
304
+ {"current_steps": 304, "total_steps": 390, "loss": 0.4159, "lr": 5.638096929354522e-06, "epoch": 3.8912, "percentage": 77.95, "elapsed_time": "12:43:18", "remaining_time": "3:35:56"}
305
+ {"current_steps": 305, "total_steps": 390, "loss": 0.3957, "lr": 5.514094053240035e-06, "epoch": 3.904, "percentage": 78.21, "elapsed_time": "12:45:49", "remaining_time": "3:33:25"}
306
+ {"current_steps": 306, "total_steps": 390, "loss": 0.42, "lr": 5.39125163204594e-06, "epoch": 3.9168, "percentage": 78.46, "elapsed_time": "12:48:23", "remaining_time": "3:30:55"}
307
+ {"current_steps": 307, "total_steps": 390, "loss": 0.3915, "lr": 5.269579506585259e-06, "epoch": 3.9295999999999998, "percentage": 78.72, "elapsed_time": "12:50:52", "remaining_time": "3:28:24"}
308
+ {"current_steps": 308, "total_steps": 390, "loss": 0.4033, "lr": 5.149087423919541e-06, "epoch": 3.9424, "percentage": 78.97, "elapsed_time": "12:53:20", "remaining_time": "3:25:53"}
309
+ {"current_steps": 309, "total_steps": 390, "loss": 0.3919, "lr": 5.029785036577976e-06, "epoch": 3.9552, "percentage": 79.23, "elapsed_time": "12:55:57", "remaining_time": "3:23:24"}
310
+ {"current_steps": 310, "total_steps": 390, "loss": 0.4279, "lr": 4.911681901784198e-06, "epoch": 3.968, "percentage": 79.49, "elapsed_time": "12:58:30", "remaining_time": "3:20:54"}
311
+ {"current_steps": 311, "total_steps": 390, "loss": 0.3985, "lr": 4.794787480690597e-06, "epoch": 3.9808, "percentage": 79.74, "elapsed_time": "13:01:06", "remaining_time": "3:18:25"}
312
+ {"current_steps": 312, "total_steps": 390, "loss": 0.4059, "lr": 4.679111137620442e-06, "epoch": 3.9936, "percentage": 80.0, "elapsed_time": "13:03:36", "remaining_time": "3:15:54"}
313
+ {"current_steps": 313, "total_steps": 390, "loss": 0.3754, "lr": 4.5646621393177e-06, "epoch": 4.0064, "percentage": 80.26, "elapsed_time": "13:07:09", "remaining_time": "3:13:38"}
314
+ {"current_steps": 314, "total_steps": 390, "loss": 0.3787, "lr": 4.451449654204685e-06, "epoch": 4.0192, "percentage": 80.51, "elapsed_time": "13:09:44", "remaining_time": "3:11:08"}