Training in progress, epoch 3
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +100 -0
- training_args.bin +2 -2
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4877660776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6b3959cdecf6ef60c2b484206b5f3eab3add555134ac9cdec8ee93a95a778b0
|
3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dbf87f404298501311732d1bbd04ac5114ac75dd2b23b06ff2ae6abc90fe7d3
|
3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81950c13053d29f0c3099f034d23d0da53df58ca5154e3915319cabf26661332
|
3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1089994880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cb6a7886b5eb86cc0dbf900e68f539b297f739c9687953cfea61e522434dcc5
|
3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
@@ -240,3 +240,103 @@
|
|
240 |
{"current_steps": 240, "total_steps": 390, "loss": 0.4066, "lr": 1.547481995140556e-05, "epoch": 3.0670926517571884, "percentage": 61.54, "elapsed_time": "1:39:31", "remaining_time": "1:02:12"}
|
241 |
{"current_steps": 241, "total_steps": 390, "loss": 0.3714, "lr": 1.5300637591670357e-05, "epoch": 3.0798722044728435, "percentage": 61.79, "elapsed_time": "1:39:55", "remaining_time": "1:01:46"}
|
242 |
{"current_steps": 242, "total_steps": 390, "loss": 0.3806, "lr": 1.5126831694287564e-05, "epoch": 3.0926517571884986, "percentage": 62.05, "elapsed_time": "1:40:15", "remaining_time": "1:01:18"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
{"current_steps": 240, "total_steps": 390, "loss": 0.4066, "lr": 1.547481995140556e-05, "epoch": 3.0670926517571884, "percentage": 61.54, "elapsed_time": "1:39:31", "remaining_time": "1:02:12"}
|
241 |
{"current_steps": 241, "total_steps": 390, "loss": 0.3714, "lr": 1.5300637591670357e-05, "epoch": 3.0798722044728435, "percentage": 61.79, "elapsed_time": "1:39:55", "remaining_time": "1:01:46"}
|
242 |
{"current_steps": 242, "total_steps": 390, "loss": 0.3806, "lr": 1.5126831694287564e-05, "epoch": 3.0926517571884986, "percentage": 62.05, "elapsed_time": "1:40:15", "remaining_time": "1:01:18"}
|
243 |
+
{"current_steps": 243, "total_steps": 390, "loss": 0.3687, "lr": 1.4953416182715566e-05, "epoch": 3.1054313099041533, "percentage": 62.31, "elapsed_time": "1:40:38", "remaining_time": "1:00:53"}
|
244 |
+
{"current_steps": 244, "total_steps": 390, "loss": 0.3827, "lr": 1.478040494913926e-05, "epoch": 3.1182108626198084, "percentage": 62.56, "elapsed_time": "1:41:02", "remaining_time": "1:00:27"}
|
245 |
+
{"current_steps": 245, "total_steps": 390, "loss": 0.3605, "lr": 1.460781185335713e-05, "epoch": 3.130990415335463, "percentage": 62.82, "elapsed_time": "1:41:23", "remaining_time": "1:00:00"}
|
246 |
+
{"current_steps": 246, "total_steps": 390, "loss": 0.4036, "lr": 1.443565072167095e-05, "epoch": 3.143769968051118, "percentage": 63.08, "elapsed_time": "1:41:46", "remaining_time": "0:59:34"}
|
247 |
+
{"current_steps": 247, "total_steps": 390, "loss": 0.4156, "lr": 1.4263935345778202e-05, "epoch": 3.1565495207667733, "percentage": 63.33, "elapsed_time": "1:42:10", "remaining_time": "0:59:08"}
|
248 |
+
{"current_steps": 248, "total_steps": 390, "loss": 0.369, "lr": 1.409267948166718e-05, "epoch": 3.169329073482428, "percentage": 63.59, "elapsed_time": "1:42:31", "remaining_time": "0:58:42"}
|
249 |
+
{"current_steps": 249, "total_steps": 390, "loss": 0.3747, "lr": 1.3921896848515064e-05, "epoch": 3.182108626198083, "percentage": 63.85, "elapsed_time": "1:42:55", "remaining_time": "0:58:16"}
|
250 |
+
{"current_steps": 250, "total_steps": 390, "loss": 0.3787, "lr": 1.3751601127588849e-05, "epoch": 3.194888178913738, "percentage": 64.1, "elapsed_time": "1:43:18", "remaining_time": "0:57:51"}
|
251 |
+
{"current_steps": 251, "total_steps": 390, "loss": 0.3906, "lr": 1.3581805961149371e-05, "epoch": 3.207667731629393, "percentage": 64.36, "elapsed_time": "1:43:42", "remaining_time": "0:57:25"}
|
252 |
+
{"current_steps": 252, "total_steps": 390, "loss": 0.375, "lr": 1.341252495135841e-05, "epoch": 3.220447284345048, "percentage": 64.62, "elapsed_time": "1:44:05", "remaining_time": "0:57:00"}
|
253 |
+
{"current_steps": 253, "total_steps": 390, "loss": 0.3721, "lr": 1.324377165918906e-05, "epoch": 3.2332268370607027, "percentage": 64.87, "elapsed_time": "1:44:27", "remaining_time": "0:56:33"}
|
254 |
+
{"current_steps": 254, "total_steps": 390, "loss": 0.3867, "lr": 1.3075559603339354e-05, "epoch": 3.246006389776358, "percentage": 65.13, "elapsed_time": "1:44:51", "remaining_time": "0:56:08"}
|
255 |
+
{"current_steps": 255, "total_steps": 390, "loss": 0.3764, "lr": 1.2907902259149287e-05, "epoch": 3.258785942492013, "percentage": 65.38, "elapsed_time": "1:45:14", "remaining_time": "0:55:42"}
|
256 |
+
{"current_steps": 256, "total_steps": 390, "loss": 0.3945, "lr": 1.274081305752135e-05, "epoch": 3.2715654952076676, "percentage": 65.64, "elapsed_time": "1:45:37", "remaining_time": "0:55:17"}
|
257 |
+
{"current_steps": 235, "total_steps": 390, "loss": 0.3811, "lr": 1.635088931591775e-05, "epoch": 3.012779552715655, "percentage": 60.26, "elapsed_time": "0:00:30", "remaining_time": "0:00:19"}
|
258 |
+
{"current_steps": 236, "total_steps": 390, "loss": 0.3924, "lr": 1.6175034512658753e-05, "epoch": 3.02555910543131, "percentage": 60.51, "elapsed_time": "0:00:54", "remaining_time": "0:00:35"}
|
259 |
+
{"current_steps": 237, "total_steps": 390, "loss": 0.393, "lr": 1.5999486124479115e-05, "epoch": 3.038338658146965, "percentage": 60.77, "elapsed_time": "0:01:17", "remaining_time": "0:00:50"}
|
260 |
+
{"current_steps": 238, "total_steps": 390, "loss": 0.3805, "lr": 1.5824258214426833e-05, "epoch": 3.0511182108626196, "percentage": 61.03, "elapsed_time": "0:01:40", "remaining_time": "0:01:04"}
|
261 |
+
{"current_steps": 239, "total_steps": 390, "loss": 0.4057, "lr": 1.5649364819876655e-05, "epoch": 3.0638977635782747, "percentage": 61.28, "elapsed_time": "0:02:04", "remaining_time": "0:01:18"}
|
262 |
+
{"current_steps": 240, "total_steps": 390, "loss": 0.3879, "lr": 1.547481995140556e-05, "epoch": 3.07667731629393, "percentage": 61.54, "elapsed_time": "0:02:28", "remaining_time": "0:01:32"}
|
263 |
+
{"current_steps": 241, "total_steps": 390, "loss": 0.3733, "lr": 1.5300637591670357e-05, "epoch": 3.0894568690095845, "percentage": 61.79, "elapsed_time": "0:02:50", "remaining_time": "0:01:45"}
|
264 |
+
{"current_steps": 242, "total_steps": 390, "loss": 0.3696, "lr": 1.5126831694287564e-05, "epoch": 3.1022364217252396, "percentage": 62.05, "elapsed_time": "0:03:13", "remaining_time": "0:01:58"}
|
265 |
+
{"current_steps": 243, "total_steps": 390, "loss": 0.3736, "lr": 1.4953416182715566e-05, "epoch": 3.1150159744408947, "percentage": 62.31, "elapsed_time": "0:03:36", "remaining_time": "0:02:10"}
|
266 |
+
{"current_steps": 244, "total_steps": 390, "loss": 0.3827, "lr": 1.478040494913926e-05, "epoch": 3.1277955271565494, "percentage": 62.56, "elapsed_time": "0:03:59", "remaining_time": "0:02:23"}
|
267 |
+
{"current_steps": 245, "total_steps": 390, "loss": 0.3923, "lr": 1.460781185335713e-05, "epoch": 3.1405750798722045, "percentage": 62.82, "elapsed_time": "0:04:21", "remaining_time": "0:02:34"}
|
268 |
+
{"current_steps": 246, "total_steps": 390, "loss": 0.4088, "lr": 1.443565072167095e-05, "epoch": 3.1533546325878596, "percentage": 63.08, "elapsed_time": "0:04:45", "remaining_time": "0:02:46"}
|
269 |
+
{"current_steps": 247, "total_steps": 390, "loss": 0.3801, "lr": 1.4263935345778202e-05, "epoch": 3.1661341853035143, "percentage": 63.33, "elapsed_time": "0:05:07", "remaining_time": "0:02:57"}
|
270 |
+
{"current_steps": 248, "total_steps": 390, "loss": 0.376, "lr": 1.409267948166718e-05, "epoch": 3.1789137380191694, "percentage": 63.59, "elapsed_time": "0:05:31", "remaining_time": "0:03:09"}
|
271 |
+
{"current_steps": 249, "total_steps": 390, "loss": 0.3815, "lr": 1.3921896848515064e-05, "epoch": 3.191693290734824, "percentage": 63.85, "elapsed_time": "0:05:55", "remaining_time": "0:03:21"}
|
272 |
+
{"current_steps": 250, "total_steps": 390, "loss": 0.3826, "lr": 1.3751601127588849e-05, "epoch": 3.2044728434504792, "percentage": 64.1, "elapsed_time": "0:06:18", "remaining_time": "0:03:32"}
|
273 |
+
{"current_steps": 251, "total_steps": 390, "loss": 0.3779, "lr": 1.3581805961149371e-05, "epoch": 3.2172523961661343, "percentage": 64.36, "elapsed_time": "0:06:41", "remaining_time": "0:03:42"}
|
274 |
+
{"current_steps": 252, "total_steps": 390, "loss": 0.3734, "lr": 1.341252495135841e-05, "epoch": 3.230031948881789, "percentage": 64.62, "elapsed_time": "0:07:03", "remaining_time": "0:03:51"}
|
275 |
+
{"current_steps": 253, "total_steps": 390, "loss": 0.3828, "lr": 1.324377165918906e-05, "epoch": 3.242811501597444, "percentage": 64.87, "elapsed_time": "0:07:26", "remaining_time": "0:04:01"}
|
276 |
+
{"current_steps": 254, "total_steps": 390, "loss": 0.3811, "lr": 1.3075559603339354e-05, "epoch": 3.255591054313099, "percentage": 65.13, "elapsed_time": "0:07:49", "remaining_time": "0:04:11"}
|
277 |
+
{"current_steps": 255, "total_steps": 390, "loss": 0.3861, "lr": 1.2907902259149287e-05, "epoch": 3.268370607028754, "percentage": 65.38, "elapsed_time": "0:08:12", "remaining_time": "0:04:20"}
|
278 |
+
{"current_steps": 256, "total_steps": 390, "loss": 0.3934, "lr": 1.274081305752135e-05, "epoch": 3.281150159744409, "percentage": 65.64, "elapsed_time": "0:08:35", "remaining_time": "0:04:29"}
|
279 |
+
{"current_steps": 257, "total_steps": 390, "loss": 0.3877, "lr": 1.2574305383844528e-05, "epoch": 3.2939297124600637, "percentage": 65.9, "elapsed_time": "0:08:57", "remaining_time": "0:04:37"}
|
280 |
+
{"current_steps": 258, "total_steps": 390, "loss": 0.3849, "lr": 1.2408392576922075e-05, "epoch": 3.306709265175719, "percentage": 66.15, "elapsed_time": "0:09:19", "remaining_time": "0:04:46"}
|
281 |
+
{"current_steps": 259, "total_steps": 390, "loss": 0.3937, "lr": 1.2243087927902905e-05, "epoch": 3.319488817891374, "percentage": 66.41, "elapsed_time": "0:09:42", "remaining_time": "0:04:54"}
|
282 |
+
{"current_steps": 260, "total_steps": 390, "loss": 0.3826, "lr": 1.2078404679216864e-05, "epoch": 3.3322683706070286, "percentage": 66.67, "elapsed_time": "0:10:06", "remaining_time": "0:05:03"}
|
283 |
+
{"current_steps": 261, "total_steps": 390, "loss": 0.3834, "lr": 1.1914356023513904e-05, "epoch": 3.3450479233226837, "percentage": 66.92, "elapsed_time": "0:10:29", "remaining_time": "0:05:11"}
|
284 |
+
{"current_steps": 262, "total_steps": 390, "loss": 0.3699, "lr": 1.1750955102607193e-05, "epoch": 3.357827476038339, "percentage": 67.18, "elapsed_time": "0:10:52", "remaining_time": "0:05:18"}
|
285 |
+
{"current_steps": 263, "total_steps": 390, "loss": 0.406, "lr": 1.1588215006420374e-05, "epoch": 3.3706070287539935, "percentage": 67.44, "elapsed_time": "0:11:16", "remaining_time": "0:05:26"}
|
286 |
+
{"current_steps": 264, "total_steps": 390, "loss": 0.4007, "lr": 1.1426148771938915e-05, "epoch": 3.3833865814696487, "percentage": 67.69, "elapsed_time": "0:11:39", "remaining_time": "0:05:33"}
|
287 |
+
{"current_steps": 265, "total_steps": 390, "loss": 0.3912, "lr": 1.1264769382165748e-05, "epoch": 3.3961661341853033, "percentage": 67.95, "elapsed_time": "0:12:02", "remaining_time": "0:05:40"}
|
288 |
+
{"current_steps": 266, "total_steps": 390, "loss": 0.3981, "lr": 1.110408976508118e-05, "epoch": 3.4089456869009584, "percentage": 68.21, "elapsed_time": "0:12:25", "remaining_time": "0:05:47"}
|
289 |
+
{"current_steps": 267, "total_steps": 390, "loss": 0.3846, "lr": 1.094412279260726e-05, "epoch": 3.4217252396166136, "percentage": 68.46, "elapsed_time": "0:12:47", "remaining_time": "0:05:53"}
|
290 |
+
{"current_steps": 268, "total_steps": 390, "loss": 0.3877, "lr": 1.0784881279576635e-05, "epoch": 3.4345047923322682, "percentage": 68.72, "elapsed_time": "0:13:10", "remaining_time": "0:06:00"}
|
291 |
+
{"current_steps": 269, "total_steps": 390, "loss": 0.3635, "lr": 1.0626377982705929e-05, "epoch": 3.4472843450479234, "percentage": 68.97, "elapsed_time": "0:13:34", "remaining_time": "0:06:06"}
|
292 |
+
{"current_steps": 270, "total_steps": 390, "loss": 0.3946, "lr": 1.0468625599573842e-05, "epoch": 3.460063897763578, "percentage": 69.23, "elapsed_time": "0:13:57", "remaining_time": "0:06:12"}
|
293 |
+
{"current_steps": 271, "total_steps": 390, "loss": 0.3731, "lr": 1.0311636767603952e-05, "epoch": 3.472843450479233, "percentage": 69.49, "elapsed_time": "0:14:19", "remaining_time": "0:06:17"}
|
294 |
+
{"current_steps": 272, "total_steps": 390, "loss": 0.3673, "lr": 1.0155424063052306e-05, "epoch": 3.4856230031948883, "percentage": 69.74, "elapsed_time": "0:14:41", "remaining_time": "0:06:22"}
|
295 |
+
{"current_steps": 273, "total_steps": 390, "loss": 0.3796, "lr": 1.0000000000000006e-05, "epoch": 3.498402555910543, "percentage": 70.0, "elapsed_time": "0:15:04", "remaining_time": "0:06:27"}
|
296 |
+
{"current_steps": 274, "total_steps": 390, "loss": 0.386, "lr": 9.84537702935065e-06, "epoch": 3.511182108626198, "percentage": 70.26, "elapsed_time": "0:15:28", "remaining_time": "0:06:33"}
|
297 |
+
{"current_steps": 275, "total_steps": 390, "loss": 0.4085, "lr": 9.691567537832964e-06, "epoch": 3.523961661341853, "percentage": 70.51, "elapsed_time": "0:15:52", "remaining_time": "0:06:38"}
|
298 |
+
{"current_steps": 276, "total_steps": 390, "loss": 0.3766, "lr": 9.538583847008452e-06, "epoch": 3.536741214057508, "percentage": 70.77, "elapsed_time": "0:16:15", "remaining_time": "0:06:42"}
|
299 |
+
{"current_steps": 277, "total_steps": 390, "loss": 0.3713, "lr": 9.386438212284372e-06, "epoch": 3.549520766773163, "percentage": 71.03, "elapsed_time": "0:16:38", "remaining_time": "0:06:47"}
|
300 |
+
{"current_steps": 278, "total_steps": 390, "loss": 0.3621, "lr": 9.235142821931928e-06, "epoch": 3.562300319488818, "percentage": 71.28, "elapsed_time": "0:17:01", "remaining_time": "0:06:51"}
|
301 |
+
{"current_steps": 279, "total_steps": 390, "loss": 0.4121, "lr": 9.084709796109907e-06, "epoch": 3.5750798722044728, "percentage": 71.54, "elapsed_time": "0:17:24", "remaining_time": "0:06:55"}
|
302 |
+
{"current_steps": 280, "total_steps": 390, "loss": 0.3948, "lr": 8.93515118589373e-06, "epoch": 3.587859424920128, "percentage": 71.79, "elapsed_time": "0:17:48", "remaining_time": "0:06:59"}
|
303 |
+
{"current_steps": 281, "total_steps": 390, "loss": 0.401, "lr": 8.786478972310023e-06, "epoch": 3.600638977635783, "percentage": 72.05, "elapsed_time": "0:18:11", "remaining_time": "0:07:03"}
|
304 |
+
{"current_steps": 282, "total_steps": 390, "loss": 0.3952, "lr": 8.638705065376887e-06, "epoch": 3.6134185303514377, "percentage": 72.31, "elapsed_time": "0:18:35", "remaining_time": "0:07:07"}
|
305 |
+
{"current_steps": 283, "total_steps": 390, "loss": 0.3943, "lr": 8.491841303149728e-06, "epoch": 3.626198083067093, "percentage": 72.56, "elapsed_time": "0:18:58", "remaining_time": "0:07:10"}
|
306 |
+
{"current_steps": 284, "total_steps": 390, "loss": 0.4198, "lr": 8.345899450772975e-06, "epoch": 3.6389776357827475, "percentage": 72.82, "elapsed_time": "0:19:22", "remaining_time": "0:07:13"}
|
307 |
+
{"current_steps": 285, "total_steps": 390, "loss": 0.3775, "lr": 8.200891199537549e-06, "epoch": 3.6517571884984026, "percentage": 73.08, "elapsed_time": "0:19:45", "remaining_time": "0:07:16"}
|
308 |
+
{"current_steps": 286, "total_steps": 390, "loss": 0.3701, "lr": 8.056828165944282e-06, "epoch": 3.6645367412140573, "percentage": 73.33, "elapsed_time": "0:20:09", "remaining_time": "0:07:19"}
|
309 |
+
{"current_steps": 287, "total_steps": 390, "loss": 0.3736, "lr": 7.913721890773354e-06, "epoch": 3.6773162939297124, "percentage": 73.59, "elapsed_time": "0:20:33", "remaining_time": "0:07:22"}
|
310 |
+
{"current_steps": 288, "total_steps": 390, "loss": 0.3831, "lr": 7.771583838159756e-06, "epoch": 3.6900958466453675, "percentage": 73.85, "elapsed_time": "0:20:56", "remaining_time": "0:07:25"}
|
311 |
+
{"current_steps": 289, "total_steps": 390, "loss": 0.366, "lr": 7.630425394674903e-06, "epoch": 3.702875399361022, "percentage": 74.1, "elapsed_time": "0:21:20", "remaining_time": "0:07:27"}
|
312 |
+
{"current_steps": 290, "total_steps": 390, "loss": 0.378, "lr": 7.49025786841445e-06, "epoch": 3.7156549520766773, "percentage": 74.36, "elapsed_time": "0:21:42", "remaining_time": "0:07:29"}
|
313 |
+
{"current_steps": 291, "total_steps": 390, "loss": 0.3774, "lr": 7.3510924880924575e-06, "epoch": 3.7284345047923324, "percentage": 74.62, "elapsed_time": "0:22:04", "remaining_time": "0:07:30"}
|
314 |
+
{"current_steps": 292, "total_steps": 390, "loss": 0.3846, "lr": 7.212940402141808e-06, "epoch": 3.741214057507987, "percentage": 74.87, "elapsed_time": "0:22:26", "remaining_time": "0:07:31"}
|
315 |
+
{"current_steps": 293, "total_steps": 390, "loss": 0.3727, "lr": 7.075812677821145e-06, "epoch": 3.753993610223642, "percentage": 75.13, "elapsed_time": "0:22:49", "remaining_time": "0:07:33"}
|
316 |
+
{"current_steps": 294, "total_steps": 390, "loss": 0.391, "lr": 6.939720300328303e-06, "epoch": 3.7667731629392973, "percentage": 75.38, "elapsed_time": "0:23:12", "remaining_time": "0:07:34"}
|
317 |
+
{"current_steps": 295, "total_steps": 390, "loss": 0.3945, "lr": 6.8046741719202385e-06, "epoch": 3.779552715654952, "percentage": 75.64, "elapsed_time": "0:23:35", "remaining_time": "0:07:35"}
|
318 |
+
{"current_steps": 296, "total_steps": 390, "loss": 0.3764, "lr": 6.67068511103971e-06, "epoch": 3.792332268370607, "percentage": 75.9, "elapsed_time": "0:23:59", "remaining_time": "0:07:37"}
|
319 |
+
{"current_steps": 297, "total_steps": 390, "loss": 0.3857, "lr": 6.537763851448593e-06, "epoch": 3.8051118210862622, "percentage": 76.15, "elapsed_time": "0:24:22", "remaining_time": "0:07:38"}
|
320 |
+
{"current_steps": 298, "total_steps": 390, "loss": 0.3873, "lr": 6.4059210413680175e-06, "epoch": 3.817891373801917, "percentage": 76.41, "elapsed_time": "0:24:45", "remaining_time": "0:07:38"}
|
321 |
+
{"current_steps": 299, "total_steps": 390, "loss": 0.3766, "lr": 6.275167242625331e-06, "epoch": 3.830670926517572, "percentage": 76.67, "elapsed_time": "0:25:08", "remaining_time": "0:07:39"}
|
322 |
+
{"current_steps": 300, "total_steps": 390, "loss": 0.3975, "lr": 6.145512929808013e-06, "epoch": 3.8434504792332267, "percentage": 76.92, "elapsed_time": "0:25:31", "remaining_time": "0:07:39"}
|
323 |
+
{"current_steps": 301, "total_steps": 390, "loss": 0.3816, "lr": 6.016968489424572e-06, "epoch": 3.856230031948882, "percentage": 77.18, "elapsed_time": "0:25:53", "remaining_time": "0:07:39"}
|
324 |
+
{"current_steps": 302, "total_steps": 390, "loss": 0.3916, "lr": 5.889544219072465e-06, "epoch": 3.8690095846645365, "percentage": 77.44, "elapsed_time": "0:26:16", "remaining_time": "0:07:39"}
|
325 |
+
{"current_steps": 303, "total_steps": 390, "loss": 0.3929, "lr": 5.7632503266131925e-06, "epoch": 3.8817891373801916, "percentage": 77.69, "elapsed_time": "0:26:38", "remaining_time": "0:07:38"}
|
326 |
+
{"current_steps": 304, "total_steps": 390, "loss": 0.3934, "lr": 5.638096929354522e-06, "epoch": 3.8945686900958467, "percentage": 77.95, "elapsed_time": "0:27:02", "remaining_time": "0:07:38"}
|
327 |
+
{"current_steps": 305, "total_steps": 390, "loss": 0.3971, "lr": 5.514094053240035e-06, "epoch": 3.9073482428115014, "percentage": 78.21, "elapsed_time": "0:27:25", "remaining_time": "0:07:38"}
|
328 |
+
{"current_steps": 306, "total_steps": 390, "loss": 0.3852, "lr": 5.39125163204594e-06, "epoch": 3.9201277955271565, "percentage": 78.46, "elapsed_time": "0:27:48", "remaining_time": "0:07:37"}
|
329 |
+
{"current_steps": 307, "total_steps": 390, "loss": 0.3792, "lr": 5.269579506585259e-06, "epoch": 3.9329073482428116, "percentage": 78.72, "elapsed_time": "0:28:10", "remaining_time": "0:07:37"}
|
330 |
+
{"current_steps": 308, "total_steps": 390, "loss": 0.3809, "lr": 5.149087423919541e-06, "epoch": 3.9456869009584663, "percentage": 78.97, "elapsed_time": "0:28:33", "remaining_time": "0:07:36"}
|
331 |
+
{"current_steps": 309, "total_steps": 390, "loss": 0.38, "lr": 5.029785036577976e-06, "epoch": 3.9584664536741214, "percentage": 79.23, "elapsed_time": "0:28:57", "remaining_time": "0:07:35"}
|
332 |
+
{"current_steps": 310, "total_steps": 390, "loss": 0.3823, "lr": 4.911681901784198e-06, "epoch": 3.9712460063897765, "percentage": 79.49, "elapsed_time": "0:29:20", "remaining_time": "0:07:34"}
|
333 |
+
{"current_steps": 311, "total_steps": 390, "loss": 0.4095, "lr": 4.794787480690597e-06, "epoch": 3.984025559105431, "percentage": 79.74, "elapsed_time": "0:29:43", "remaining_time": "0:07:32"}
|
334 |
+
{"current_steps": 312, "total_steps": 390, "loss": 0.3764, "lr": 4.679111137620442e-06, "epoch": 3.9968051118210863, "percentage": 80.0, "elapsed_time": "0:30:06", "remaining_time": "0:07:31"}
|
335 |
+
{"current_steps": 313, "total_steps": 390, "loss": 0.3437, "lr": 4.5646621393177e-06, "epoch": 4.0095846645367414, "percentage": 80.26, "elapsed_time": "0:32:14", "remaining_time": "0:07:56"}
|
336 |
+
{"current_steps": 314, "total_steps": 390, "loss": 0.3662, "lr": 4.451449654204685e-06, "epoch": 4.022364217252396, "percentage": 80.51, "elapsed_time": "0:32:38", "remaining_time": "0:07:53"}
|
337 |
+
{"current_steps": 315, "total_steps": 390, "loss": 0.3355, "lr": 4.339482751647557e-06, "epoch": 4.035143769968051, "percentage": 80.77, "elapsed_time": "0:33:00", "remaining_time": "0:07:51"}
|
338 |
+
{"current_steps": 316, "total_steps": 390, "loss": 0.3394, "lr": 4.228770401229824e-06, "epoch": 4.047923322683706, "percentage": 81.03, "elapsed_time": "0:33:23", "remaining_time": "0:07:49"}
|
339 |
+
{"current_steps": 317, "total_steps": 390, "loss": 0.3384, "lr": 4.119321472033779e-06, "epoch": 4.060702875399361, "percentage": 81.28, "elapsed_time": "0:33:46", "remaining_time": "0:07:46"}
|
340 |
+
{"current_steps": 318, "total_steps": 390, "loss": 0.3434, "lr": 4.011144731929981e-06, "epoch": 4.073482428115016, "percentage": 81.54, "elapsed_time": "0:34:09", "remaining_time": "0:07:44"}
|
341 |
+
{"current_steps": 319, "total_steps": 390, "loss": 0.32, "lr": 3.904248846874894e-06, "epoch": 4.086261980830671, "percentage": 81.79, "elapsed_time": "0:34:30", "remaining_time": "0:07:40"}
|
342 |
+
{"current_steps": 320, "total_steps": 390, "loss": 0.3235, "lr": 3.7986423802166705e-06, "epoch": 4.099041533546326, "percentage": 82.05, "elapsed_time": "0:34:53", "remaining_time": "0:07:37"}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c565d9255f0ecbe1fd17fead1aff35903ddc5a987a947834d14ccc23511ff324
|
3 |
+
size 7480
|