Training in progress, step 15000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891558696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e05637d9fe00567351aebe30b8907548391539066a69466b08d62fb0de2c8b6a
|
| 3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783272762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd2dde3b7d6cb9a958c80e4da86c9ac7e84d7b0aad33d337c26e27372676e0e8
|
| 3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ac6d446adeddd129c374743386b9fda911e1104accc0a9ad12d81db0a9913ff
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e468fb4a523a6bf7dace3eac71fcc8bc1ed6b95078548573228e864e9505bcd
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.08158940076828003,
|
| 3 |
"best_model_checkpoint": "./fine-tuned/checkpoint-12500",
|
| 4 |
-
"epoch": 2.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2269,6 +2269,84 @@
|
|
| 2269 |
"eval_samples_per_second": 17.144,
|
| 2270 |
"eval_steps_per_second": 2.143,
|
| 2271 |
"step": 14500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2272 |
}
|
| 2273 |
],
|
| 2274 |
"logging_steps": 50,
|
|
@@ -2288,7 +2366,7 @@
|
|
| 2288 |
"attributes": {}
|
| 2289 |
}
|
| 2290 |
},
|
| 2291 |
-
"total_flos": 7.
|
| 2292 |
"train_batch_size": 8,
|
| 2293 |
"trial_name": null,
|
| 2294 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.08158940076828003,
|
| 3 |
"best_model_checkpoint": "./fine-tuned/checkpoint-12500",
|
| 4 |
+
"epoch": 2.4,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 15000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2269 |
"eval_samples_per_second": 17.144,
|
| 2270 |
"eval_steps_per_second": 2.143,
|
| 2271 |
"step": 14500
|
| 2272 |
+
},
|
| 2273 |
+
{
|
| 2274 |
+
"epoch": 2.328,
|
| 2275 |
+
"grad_norm": 7419.63623046875,
|
| 2276 |
+
"learning_rate": 6.72e-06,
|
| 2277 |
+
"loss": 0.0527,
|
| 2278 |
+
"step": 14550
|
| 2279 |
+
},
|
| 2280 |
+
{
|
| 2281 |
+
"epoch": 2.336,
|
| 2282 |
+
"grad_norm": 6152.6513671875,
|
| 2283 |
+
"learning_rate": 6.64e-06,
|
| 2284 |
+
"loss": 0.048,
|
| 2285 |
+
"step": 14600
|
| 2286 |
+
},
|
| 2287 |
+
{
|
| 2288 |
+
"epoch": 2.344,
|
| 2289 |
+
"grad_norm": 6703.68994140625,
|
| 2290 |
+
"learning_rate": 6.560000000000001e-06,
|
| 2291 |
+
"loss": 0.0537,
|
| 2292 |
+
"step": 14650
|
| 2293 |
+
},
|
| 2294 |
+
{
|
| 2295 |
+
"epoch": 2.352,
|
| 2296 |
+
"grad_norm": 8612.31640625,
|
| 2297 |
+
"learning_rate": 6.48e-06,
|
| 2298 |
+
"loss": 0.0512,
|
| 2299 |
+
"step": 14700
|
| 2300 |
+
},
|
| 2301 |
+
{
|
| 2302 |
+
"epoch": 2.36,
|
| 2303 |
+
"grad_norm": 6183.3798828125,
|
| 2304 |
+
"learning_rate": 6.4000000000000006e-06,
|
| 2305 |
+
"loss": 0.0499,
|
| 2306 |
+
"step": 14750
|
| 2307 |
+
},
|
| 2308 |
+
{
|
| 2309 |
+
"epoch": 2.368,
|
| 2310 |
+
"grad_norm": 7795.396484375,
|
| 2311 |
+
"learning_rate": 6.3200000000000005e-06,
|
| 2312 |
+
"loss": 0.0525,
|
| 2313 |
+
"step": 14800
|
| 2314 |
+
},
|
| 2315 |
+
{
|
| 2316 |
+
"epoch": 2.376,
|
| 2317 |
+
"grad_norm": 6911.2099609375,
|
| 2318 |
+
"learning_rate": 6.2399999999999995e-06,
|
| 2319 |
+
"loss": 0.0503,
|
| 2320 |
+
"step": 14850
|
| 2321 |
+
},
|
| 2322 |
+
{
|
| 2323 |
+
"epoch": 2.384,
|
| 2324 |
+
"grad_norm": 9744.9267578125,
|
| 2325 |
+
"learning_rate": 6.16e-06,
|
| 2326 |
+
"loss": 0.0509,
|
| 2327 |
+
"step": 14900
|
| 2328 |
+
},
|
| 2329 |
+
{
|
| 2330 |
+
"epoch": 2.392,
|
| 2331 |
+
"grad_norm": 4487.8115234375,
|
| 2332 |
+
"learning_rate": 6.08e-06,
|
| 2333 |
+
"loss": 0.0504,
|
| 2334 |
+
"step": 14950
|
| 2335 |
+
},
|
| 2336 |
+
{
|
| 2337 |
+
"epoch": 2.4,
|
| 2338 |
+
"grad_norm": 6276.47607421875,
|
| 2339 |
+
"learning_rate": 6e-06,
|
| 2340 |
+
"loss": 0.0505,
|
| 2341 |
+
"step": 15000
|
| 2342 |
+
},
|
| 2343 |
+
{
|
| 2344 |
+
"epoch": 2.4,
|
| 2345 |
+
"eval_loss": 0.08178989589214325,
|
| 2346 |
+
"eval_runtime": 116.6529,
|
| 2347 |
+
"eval_samples_per_second": 17.145,
|
| 2348 |
+
"eval_steps_per_second": 2.143,
|
| 2349 |
+
"step": 15000
|
| 2350 |
}
|
| 2351 |
],
|
| 2352 |
"logging_steps": 50,
|
|
|
|
| 2366 |
"attributes": {}
|
| 2367 |
}
|
| 2368 |
},
|
| 2369 |
+
"total_flos": 7.30749468672e+16,
|
| 2370 |
"train_batch_size": 8,
|
| 2371 |
"trial_name": null,
|
| 2372 |
"trial_params": null
|