|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.0, |
|
"eval_steps": 200, |
|
"global_step": 2925, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03076923076923077, |
|
"grad_norm": 24.726886749267578, |
|
"learning_rate": 1.8e-06, |
|
"loss": 11.4221, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06153846153846154, |
|
"grad_norm": 17.795185089111328, |
|
"learning_rate": 3.8e-06, |
|
"loss": 10.4941, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09230769230769231, |
|
"grad_norm": 14.8720703125, |
|
"learning_rate": 5.8e-06, |
|
"loss": 10.943, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12307692307692308, |
|
"grad_norm": 26.370025634765625, |
|
"learning_rate": 7.8e-06, |
|
"loss": 9.7676, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 25.46526527404785, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 8.0724, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18461538461538463, |
|
"grad_norm": 31.001745223999023, |
|
"learning_rate": 1.18e-05, |
|
"loss": 6.9668, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2153846153846154, |
|
"grad_norm": 9.090025901794434, |
|
"learning_rate": 1.3800000000000002e-05, |
|
"loss": 5.0298, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"grad_norm": 4.529256820678711, |
|
"learning_rate": 1.58e-05, |
|
"loss": 3.5286, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27692307692307694, |
|
"grad_norm": 2.912689685821533, |
|
"learning_rate": 1.78e-05, |
|
"loss": 2.9783, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 2.04130220413208, |
|
"learning_rate": 1.9800000000000004e-05, |
|
"loss": 2.5693, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3384615384615385, |
|
"grad_norm": 1.7645025253295898, |
|
"learning_rate": 2.18e-05, |
|
"loss": 2.3032, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.36923076923076925, |
|
"grad_norm": 1.4153923988342285, |
|
"learning_rate": 2.38e-05, |
|
"loss": 2.1182, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.1566500663757324, |
|
"learning_rate": 2.58e-05, |
|
"loss": 2.2848, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4307692307692308, |
|
"grad_norm": 1.7678470611572266, |
|
"learning_rate": 2.7800000000000005e-05, |
|
"loss": 2.2928, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 1.717806100845337, |
|
"learning_rate": 2.98e-05, |
|
"loss": 2.0866, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.49230769230769234, |
|
"grad_norm": 1.8488136529922485, |
|
"learning_rate": 3.18e-05, |
|
"loss": 2.2323, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5230769230769231, |
|
"grad_norm": 1.8968263864517212, |
|
"learning_rate": 3.38e-05, |
|
"loss": 2.2183, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5538461538461539, |
|
"grad_norm": 1.398100733757019, |
|
"learning_rate": 3.58e-05, |
|
"loss": 1.8395, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5846153846153846, |
|
"grad_norm": 1.5084631443023682, |
|
"learning_rate": 3.7800000000000004e-05, |
|
"loss": 1.9694, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 1.1019172668457031, |
|
"learning_rate": 3.9800000000000005e-05, |
|
"loss": 1.9702, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"eval_gen_len": 186.9088, |
|
"eval_loss": 1.8701356649398804, |
|
"eval_rouge1": 0.5735, |
|
"eval_rouge2": 0.2231, |
|
"eval_rougeL": 0.4346, |
|
"eval_runtime": 33.3119, |
|
"eval_samples_per_second": 8.225, |
|
"eval_steps_per_second": 2.071, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6461538461538462, |
|
"grad_norm": 1.5155857801437378, |
|
"learning_rate": 4.18e-05, |
|
"loss": 2.0705, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.676923076923077, |
|
"grad_norm": 1.3338611125946045, |
|
"learning_rate": 4.38e-05, |
|
"loss": 2.051, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7076923076923077, |
|
"grad_norm": 1.2012193202972412, |
|
"learning_rate": 4.58e-05, |
|
"loss": 1.9834, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7384615384615385, |
|
"grad_norm": 1.527007818222046, |
|
"learning_rate": 4.78e-05, |
|
"loss": 2.0451, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 1.2146987915039062, |
|
"learning_rate": 4.9800000000000004e-05, |
|
"loss": 1.974, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.7576699256896973, |
|
"learning_rate": 5.1800000000000005e-05, |
|
"loss": 2.0347, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8307692307692308, |
|
"grad_norm": 1.17750084400177, |
|
"learning_rate": 5.380000000000001e-05, |
|
"loss": 1.9763, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8615384615384616, |
|
"grad_norm": 1.3267815113067627, |
|
"learning_rate": 5.580000000000001e-05, |
|
"loss": 1.842, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8923076923076924, |
|
"grad_norm": 1.0520875453948975, |
|
"learning_rate": 5.7799999999999995e-05, |
|
"loss": 2.0525, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 1.4701600074768066, |
|
"learning_rate": 5.9800000000000003e-05, |
|
"loss": 1.7418, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9538461538461539, |
|
"grad_norm": 1.062267780303955, |
|
"learning_rate": 6.18e-05, |
|
"loss": 1.9685, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9846153846153847, |
|
"grad_norm": 1.2724727392196655, |
|
"learning_rate": 6.38e-05, |
|
"loss": 1.7972, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0153846153846153, |
|
"grad_norm": 1.2834393978118896, |
|
"learning_rate": 6.58e-05, |
|
"loss": 1.8395, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.0461538461538462, |
|
"grad_norm": 0.9657095074653625, |
|
"learning_rate": 6.780000000000001e-05, |
|
"loss": 1.836, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.0769230769230769, |
|
"grad_norm": 1.0390011072158813, |
|
"learning_rate": 6.98e-05, |
|
"loss": 1.9328, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.1076923076923078, |
|
"grad_norm": 1.2896322011947632, |
|
"learning_rate": 7.18e-05, |
|
"loss": 1.8227, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.1384615384615384, |
|
"grad_norm": 1.53290593624115, |
|
"learning_rate": 7.38e-05, |
|
"loss": 1.9214, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.1692307692307693, |
|
"grad_norm": 1.0145893096923828, |
|
"learning_rate": 7.58e-05, |
|
"loss": 1.8295, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 1.2127511501312256, |
|
"learning_rate": 7.780000000000001e-05, |
|
"loss": 1.7774, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"grad_norm": 1.1971853971481323, |
|
"learning_rate": 7.98e-05, |
|
"loss": 1.9926, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.7412512302398682, |
|
"eval_rouge1": 0.5961, |
|
"eval_rouge2": 0.2459, |
|
"eval_rougeL": 0.4577, |
|
"eval_runtime": 33.1213, |
|
"eval_samples_per_second": 8.273, |
|
"eval_steps_per_second": 2.083, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2615384615384615, |
|
"grad_norm": 1.107351303100586, |
|
"learning_rate": 8.18e-05, |
|
"loss": 1.8515, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.2923076923076924, |
|
"grad_norm": 1.1407504081726074, |
|
"learning_rate": 8.38e-05, |
|
"loss": 1.7011, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.323076923076923, |
|
"grad_norm": 1.418338656425476, |
|
"learning_rate": 8.58e-05, |
|
"loss": 1.6384, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.353846153846154, |
|
"grad_norm": 1.3715286254882812, |
|
"learning_rate": 8.78e-05, |
|
"loss": 1.8502, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.3846153846153846, |
|
"grad_norm": 0.9774390459060669, |
|
"learning_rate": 8.98e-05, |
|
"loss": 1.8264, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.4153846153846155, |
|
"grad_norm": 1.4778176546096802, |
|
"learning_rate": 9.180000000000001e-05, |
|
"loss": 1.694, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.4461538461538461, |
|
"grad_norm": 1.2721563577651978, |
|
"learning_rate": 9.38e-05, |
|
"loss": 1.8213, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.476923076923077, |
|
"grad_norm": 0.94813472032547, |
|
"learning_rate": 9.58e-05, |
|
"loss": 1.6636, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5076923076923077, |
|
"grad_norm": 1.0905983448028564, |
|
"learning_rate": 9.78e-05, |
|
"loss": 1.7712, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 1.1593286991119385, |
|
"learning_rate": 9.98e-05, |
|
"loss": 1.808, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5692307692307692, |
|
"grad_norm": 1.0593713521957397, |
|
"learning_rate": 9.967272727272727e-05, |
|
"loss": 1.82, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.941973865032196, |
|
"learning_rate": 9.930909090909092e-05, |
|
"loss": 1.7341, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.6307692307692307, |
|
"grad_norm": 0.80891352891922, |
|
"learning_rate": 9.894545454545455e-05, |
|
"loss": 1.6166, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.6615384615384614, |
|
"grad_norm": 1.0325396060943604, |
|
"learning_rate": 9.858181818181819e-05, |
|
"loss": 1.8333, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.6923076923076923, |
|
"grad_norm": 1.3042590618133545, |
|
"learning_rate": 9.821818181818182e-05, |
|
"loss": 1.6287, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.7230769230769232, |
|
"grad_norm": 1.475900650024414, |
|
"learning_rate": 9.785454545454545e-05, |
|
"loss": 1.6019, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.7538461538461538, |
|
"grad_norm": 1.1589939594268799, |
|
"learning_rate": 9.74909090909091e-05, |
|
"loss": 1.6904, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.7846153846153845, |
|
"grad_norm": 1.2714788913726807, |
|
"learning_rate": 9.712727272727274e-05, |
|
"loss": 1.7928, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.8153846153846154, |
|
"grad_norm": 1.2037074565887451, |
|
"learning_rate": 9.676363636363637e-05, |
|
"loss": 1.8325, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"grad_norm": 1.1115801334381104, |
|
"learning_rate": 9.64e-05, |
|
"loss": 1.7673, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.69492506980896, |
|
"eval_rouge1": 0.6004, |
|
"eval_rouge2": 0.2505, |
|
"eval_rougeL": 0.4658, |
|
"eval_runtime": 33.2205, |
|
"eval_samples_per_second": 8.248, |
|
"eval_steps_per_second": 2.077, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.876923076923077, |
|
"grad_norm": 1.2008461952209473, |
|
"learning_rate": 9.603636363636364e-05, |
|
"loss": 1.7674, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.9076923076923076, |
|
"grad_norm": 1.1482900381088257, |
|
"learning_rate": 9.567272727272729e-05, |
|
"loss": 1.7932, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.9384615384615385, |
|
"grad_norm": 1.0144352912902832, |
|
"learning_rate": 9.530909090909092e-05, |
|
"loss": 1.6315, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.9692307692307693, |
|
"grad_norm": 0.9276631474494934, |
|
"learning_rate": 9.494545454545455e-05, |
|
"loss": 1.8373, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.0593888759613037, |
|
"learning_rate": 9.458181818181819e-05, |
|
"loss": 1.7967, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.0307692307692307, |
|
"grad_norm": 1.1406164169311523, |
|
"learning_rate": 9.421818181818183e-05, |
|
"loss": 1.7668, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.0615384615384613, |
|
"grad_norm": 0.9809508919715881, |
|
"learning_rate": 9.385454545454546e-05, |
|
"loss": 1.6602, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.0923076923076924, |
|
"grad_norm": 1.1698426008224487, |
|
"learning_rate": 9.349090909090909e-05, |
|
"loss": 1.6775, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.123076923076923, |
|
"grad_norm": 1.1539372205734253, |
|
"learning_rate": 9.312727272727274e-05, |
|
"loss": 1.533, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.1538461538461537, |
|
"grad_norm": 1.0783981084823608, |
|
"learning_rate": 9.276363636363637e-05, |
|
"loss": 1.5243, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.184615384615385, |
|
"grad_norm": 1.262705683708191, |
|
"learning_rate": 9.240000000000001e-05, |
|
"loss": 1.6625, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.2153846153846155, |
|
"grad_norm": 1.1545718908309937, |
|
"learning_rate": 9.203636363636364e-05, |
|
"loss": 1.9172, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.246153846153846, |
|
"grad_norm": 0.9896947741508484, |
|
"learning_rate": 9.167272727272728e-05, |
|
"loss": 1.5449, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.276923076923077, |
|
"grad_norm": 1.063262701034546, |
|
"learning_rate": 9.130909090909091e-05, |
|
"loss": 1.5318, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 1.0846728086471558, |
|
"learning_rate": 9.094545454545454e-05, |
|
"loss": 1.5875, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.3384615384615386, |
|
"grad_norm": 1.0437549352645874, |
|
"learning_rate": 9.058181818181819e-05, |
|
"loss": 1.5724, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.3692307692307693, |
|
"grad_norm": 1.050115942955017, |
|
"learning_rate": 9.021818181818183e-05, |
|
"loss": 1.662, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.9663347601890564, |
|
"learning_rate": 8.985454545454546e-05, |
|
"loss": 1.6284, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.430769230769231, |
|
"grad_norm": 1.1656932830810547, |
|
"learning_rate": 8.949090909090909e-05, |
|
"loss": 1.5995, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.4615384615384617, |
|
"grad_norm": 1.073716402053833, |
|
"learning_rate": 8.912727272727273e-05, |
|
"loss": 1.4811, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.4615384615384617, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.6769312620162964, |
|
"eval_rouge1": 0.6042, |
|
"eval_rouge2": 0.2561, |
|
"eval_rougeL": 0.4686, |
|
"eval_runtime": 33.5273, |
|
"eval_samples_per_second": 8.172, |
|
"eval_steps_per_second": 2.058, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.4923076923076923, |
|
"grad_norm": 0.9764583110809326, |
|
"learning_rate": 8.876363636363638e-05, |
|
"loss": 1.5478, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.523076923076923, |
|
"grad_norm": 0.9336417317390442, |
|
"learning_rate": 8.840000000000001e-05, |
|
"loss": 1.5138, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.5538461538461537, |
|
"grad_norm": 0.9714758992195129, |
|
"learning_rate": 8.803636363636364e-05, |
|
"loss": 1.5506, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.5846153846153848, |
|
"grad_norm": 0.9208464622497559, |
|
"learning_rate": 8.767272727272727e-05, |
|
"loss": 1.4944, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.6153846153846154, |
|
"grad_norm": 1.0252026319503784, |
|
"learning_rate": 8.730909090909092e-05, |
|
"loss": 1.6991, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.646153846153846, |
|
"grad_norm": 1.0464015007019043, |
|
"learning_rate": 8.694545454545455e-05, |
|
"loss": 1.679, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.676923076923077, |
|
"grad_norm": 1.3673149347305298, |
|
"learning_rate": 8.658181818181818e-05, |
|
"loss": 1.5021, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.707692307692308, |
|
"grad_norm": 1.1350778341293335, |
|
"learning_rate": 8.621818181818181e-05, |
|
"loss": 1.5898, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.7384615384615385, |
|
"grad_norm": 0.9916401505470276, |
|
"learning_rate": 8.585454545454546e-05, |
|
"loss": 1.6542, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"grad_norm": 0.9967766404151917, |
|
"learning_rate": 8.54909090909091e-05, |
|
"loss": 1.7056, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 1.2031991481781006, |
|
"learning_rate": 8.512727272727273e-05, |
|
"loss": 1.6856, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.830769230769231, |
|
"grad_norm": 1.0159794092178345, |
|
"learning_rate": 8.476363636363636e-05, |
|
"loss": 1.5293, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.8615384615384616, |
|
"grad_norm": 1.3572866916656494, |
|
"learning_rate": 8.44e-05, |
|
"loss": 1.6191, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.8923076923076922, |
|
"grad_norm": 1.2567291259765625, |
|
"learning_rate": 8.403636363636364e-05, |
|
"loss": 1.7504, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.9230769230769234, |
|
"grad_norm": 1.2280553579330444, |
|
"learning_rate": 8.367272727272728e-05, |
|
"loss": 1.6523, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.953846153846154, |
|
"grad_norm": 1.0409953594207764, |
|
"learning_rate": 8.330909090909091e-05, |
|
"loss": 1.5903, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.9846153846153847, |
|
"grad_norm": 1.10386061668396, |
|
"learning_rate": 8.294545454545455e-05, |
|
"loss": 1.5235, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.0153846153846153, |
|
"grad_norm": 1.0341882705688477, |
|
"learning_rate": 8.258181818181818e-05, |
|
"loss": 1.6025, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.046153846153846, |
|
"grad_norm": 1.3020343780517578, |
|
"learning_rate": 8.221818181818183e-05, |
|
"loss": 1.4696, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 1.104643702507019, |
|
"learning_rate": 8.185454545454546e-05, |
|
"loss": 1.4009, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.6721168756484985, |
|
"eval_rouge1": 0.6044, |
|
"eval_rouge2": 0.2558, |
|
"eval_rougeL": 0.4692, |
|
"eval_runtime": 34.4765, |
|
"eval_samples_per_second": 7.947, |
|
"eval_steps_per_second": 2.001, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.1076923076923078, |
|
"grad_norm": 1.1327263116836548, |
|
"learning_rate": 8.14909090909091e-05, |
|
"loss": 1.5893, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.1384615384615384, |
|
"grad_norm": 1.168095350265503, |
|
"learning_rate": 8.112727272727273e-05, |
|
"loss": 1.4248, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.169230769230769, |
|
"grad_norm": 0.9978489279747009, |
|
"learning_rate": 8.076363636363636e-05, |
|
"loss": 1.5407, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 1.0370062589645386, |
|
"learning_rate": 8.04e-05, |
|
"loss": 1.4867, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.230769230769231, |
|
"grad_norm": 0.9647369384765625, |
|
"learning_rate": 8.003636363636365e-05, |
|
"loss": 1.4806, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.2615384615384615, |
|
"grad_norm": 1.3316948413848877, |
|
"learning_rate": 7.967272727272728e-05, |
|
"loss": 1.4612, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.292307692307692, |
|
"grad_norm": 1.37971830368042, |
|
"learning_rate": 7.93090909090909e-05, |
|
"loss": 1.5745, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 3.3230769230769233, |
|
"grad_norm": 1.1220242977142334, |
|
"learning_rate": 7.894545454545455e-05, |
|
"loss": 1.3228, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.353846153846154, |
|
"grad_norm": 1.0595531463623047, |
|
"learning_rate": 7.85818181818182e-05, |
|
"loss": 1.4618, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 3.3846153846153846, |
|
"grad_norm": 1.3739666938781738, |
|
"learning_rate": 7.821818181818182e-05, |
|
"loss": 1.4973, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.4153846153846152, |
|
"grad_norm": 1.2643866539001465, |
|
"learning_rate": 7.785454545454545e-05, |
|
"loss": 1.533, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.4461538461538463, |
|
"grad_norm": 1.232230544090271, |
|
"learning_rate": 7.74909090909091e-05, |
|
"loss": 1.4867, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.476923076923077, |
|
"grad_norm": 0.9712868332862854, |
|
"learning_rate": 7.712727272727273e-05, |
|
"loss": 1.4916, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.5076923076923077, |
|
"grad_norm": 1.3414063453674316, |
|
"learning_rate": 7.676363636363637e-05, |
|
"loss": 1.5992, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.5384615384615383, |
|
"grad_norm": 1.0222588777542114, |
|
"learning_rate": 7.64e-05, |
|
"loss": 1.5378, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.569230769230769, |
|
"grad_norm": 1.1905276775360107, |
|
"learning_rate": 7.603636363636364e-05, |
|
"loss": 1.4324, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 1.2846956253051758, |
|
"learning_rate": 7.567272727272727e-05, |
|
"loss": 1.4927, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.6307692307692307, |
|
"grad_norm": 1.165310025215149, |
|
"learning_rate": 7.530909090909092e-05, |
|
"loss": 1.4277, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.6615384615384614, |
|
"grad_norm": 1.063883900642395, |
|
"learning_rate": 7.494545454545455e-05, |
|
"loss": 1.5226, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.6923076923076925, |
|
"grad_norm": 1.3430577516555786, |
|
"learning_rate": 7.458181818181819e-05, |
|
"loss": 1.5315, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.6923076923076925, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.6592342853546143, |
|
"eval_rouge1": 0.6057, |
|
"eval_rouge2": 0.2572, |
|
"eval_rougeL": 0.4709, |
|
"eval_runtime": 33.3732, |
|
"eval_samples_per_second": 8.21, |
|
"eval_steps_per_second": 2.068, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.723076923076923, |
|
"grad_norm": 1.2028673887252808, |
|
"learning_rate": 7.421818181818182e-05, |
|
"loss": 1.5135, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.753846153846154, |
|
"grad_norm": 0.9091282486915588, |
|
"learning_rate": 7.385454545454545e-05, |
|
"loss": 1.3876, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.7846153846153845, |
|
"grad_norm": 0.9549902677536011, |
|
"learning_rate": 7.34909090909091e-05, |
|
"loss": 1.7408, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.815384615384615, |
|
"grad_norm": 1.109423279762268, |
|
"learning_rate": 7.312727272727274e-05, |
|
"loss": 1.3897, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"grad_norm": 1.1412984132766724, |
|
"learning_rate": 7.276363636363637e-05, |
|
"loss": 1.5034, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.876923076923077, |
|
"grad_norm": 1.176283359527588, |
|
"learning_rate": 7.24e-05, |
|
"loss": 1.5426, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.9076923076923076, |
|
"grad_norm": 1.3558485507965088, |
|
"learning_rate": 7.203636363636364e-05, |
|
"loss": 1.5287, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.9384615384615387, |
|
"grad_norm": 1.23688805103302, |
|
"learning_rate": 7.167272727272729e-05, |
|
"loss": 1.5794, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.9692307692307693, |
|
"grad_norm": 1.248528242111206, |
|
"learning_rate": 7.130909090909092e-05, |
|
"loss": 1.5166, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.8396208882331848, |
|
"learning_rate": 7.094545454545455e-05, |
|
"loss": 1.4648, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.030769230769231, |
|
"grad_norm": 1.0756162405014038, |
|
"learning_rate": 7.058181818181819e-05, |
|
"loss": 1.4147, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 4.061538461538461, |
|
"grad_norm": 1.1731723546981812, |
|
"learning_rate": 7.021818181818182e-05, |
|
"loss": 1.3732, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 4.092307692307692, |
|
"grad_norm": 1.2993149757385254, |
|
"learning_rate": 6.985454545454546e-05, |
|
"loss": 1.4118, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 4.123076923076923, |
|
"grad_norm": 1.1648204326629639, |
|
"learning_rate": 6.949090909090909e-05, |
|
"loss": 1.4258, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 4.153846153846154, |
|
"grad_norm": 1.1242045164108276, |
|
"learning_rate": 6.912727272727274e-05, |
|
"loss": 1.3598, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.184615384615385, |
|
"grad_norm": 1.53397536277771, |
|
"learning_rate": 6.876363636363637e-05, |
|
"loss": 1.3431, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 4.2153846153846155, |
|
"grad_norm": 1.3859331607818604, |
|
"learning_rate": 6.840000000000001e-05, |
|
"loss": 1.5014, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 4.246153846153846, |
|
"grad_norm": 1.0821737051010132, |
|
"learning_rate": 6.803636363636364e-05, |
|
"loss": 1.2638, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 4.276923076923077, |
|
"grad_norm": 1.2648742198944092, |
|
"learning_rate": 6.767272727272728e-05, |
|
"loss": 1.4567, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 4.3076923076923075, |
|
"grad_norm": 1.1357372999191284, |
|
"learning_rate": 6.730909090909091e-05, |
|
"loss": 1.4706, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.3076923076923075, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.6691502332687378, |
|
"eval_rouge1": 0.6061, |
|
"eval_rouge2": 0.2593, |
|
"eval_rougeL": 0.4719, |
|
"eval_runtime": 33.439, |
|
"eval_samples_per_second": 8.194, |
|
"eval_steps_per_second": 2.063, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.338461538461538, |
|
"grad_norm": 1.4324264526367188, |
|
"learning_rate": 6.694545454545454e-05, |
|
"loss": 1.4032, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 4.36923076923077, |
|
"grad_norm": 1.3646095991134644, |
|
"learning_rate": 6.658181818181819e-05, |
|
"loss": 1.3943, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.9991398453712463, |
|
"learning_rate": 6.621818181818183e-05, |
|
"loss": 1.5292, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 4.430769230769231, |
|
"grad_norm": 1.1873986721038818, |
|
"learning_rate": 6.585454545454546e-05, |
|
"loss": 1.4813, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 4.461538461538462, |
|
"grad_norm": 1.0080267190933228, |
|
"learning_rate": 6.549090909090909e-05, |
|
"loss": 1.4951, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.492307692307692, |
|
"grad_norm": 1.5542734861373901, |
|
"learning_rate": 6.512727272727272e-05, |
|
"loss": 1.5603, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 4.523076923076923, |
|
"grad_norm": 1.2610498666763306, |
|
"learning_rate": 6.476363636363638e-05, |
|
"loss": 1.3286, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 4.553846153846154, |
|
"grad_norm": 1.0882760286331177, |
|
"learning_rate": 6.440000000000001e-05, |
|
"loss": 1.309, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 4.584615384615384, |
|
"grad_norm": 1.1589834690093994, |
|
"learning_rate": 6.403636363636364e-05, |
|
"loss": 1.414, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 4.615384615384615, |
|
"grad_norm": 1.3731642961502075, |
|
"learning_rate": 6.367272727272727e-05, |
|
"loss": 1.4529, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.6461538461538465, |
|
"grad_norm": 1.3053221702575684, |
|
"learning_rate": 6.330909090909091e-05, |
|
"loss": 1.4623, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 4.676923076923077, |
|
"grad_norm": 1.2154396772384644, |
|
"learning_rate": 6.294545454545455e-05, |
|
"loss": 1.4766, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 4.707692307692308, |
|
"grad_norm": 1.0947812795639038, |
|
"learning_rate": 6.258181818181818e-05, |
|
"loss": 1.3212, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.7384615384615385, |
|
"grad_norm": 1.005462646484375, |
|
"learning_rate": 6.221818181818181e-05, |
|
"loss": 1.3956, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.769230769230769, |
|
"grad_norm": 1.196108341217041, |
|
"learning_rate": 6.185454545454546e-05, |
|
"loss": 1.4048, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 1.355747103691101, |
|
"learning_rate": 6.14909090909091e-05, |
|
"loss": 1.3474, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 4.8307692307692305, |
|
"grad_norm": 1.177310585975647, |
|
"learning_rate": 6.112727272727273e-05, |
|
"loss": 1.3038, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 4.861538461538462, |
|
"grad_norm": 1.273474097251892, |
|
"learning_rate": 6.076363636363637e-05, |
|
"loss": 1.3858, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 4.892307692307693, |
|
"grad_norm": 1.2601613998413086, |
|
"learning_rate": 6.04e-05, |
|
"loss": 1.2742, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"grad_norm": 1.052040696144104, |
|
"learning_rate": 6.0036363636363634e-05, |
|
"loss": 1.5551, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.6678508520126343, |
|
"eval_rouge1": 0.6061, |
|
"eval_rouge2": 0.2582, |
|
"eval_rougeL": 0.4724, |
|
"eval_runtime": 33.608, |
|
"eval_samples_per_second": 8.153, |
|
"eval_steps_per_second": 2.053, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.953846153846154, |
|
"grad_norm": 1.4437050819396973, |
|
"learning_rate": 5.967272727272728e-05, |
|
"loss": 1.3613, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 4.984615384615385, |
|
"grad_norm": 1.398398518562317, |
|
"learning_rate": 5.9309090909090915e-05, |
|
"loss": 1.3861, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 5.015384615384615, |
|
"grad_norm": 1.1901689767837524, |
|
"learning_rate": 5.894545454545455e-05, |
|
"loss": 1.4525, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 5.046153846153846, |
|
"grad_norm": 1.2631349563598633, |
|
"learning_rate": 5.858181818181818e-05, |
|
"loss": 1.3698, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 5.076923076923077, |
|
"grad_norm": 1.3628337383270264, |
|
"learning_rate": 5.821818181818182e-05, |
|
"loss": 1.1917, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 5.107692307692307, |
|
"grad_norm": 1.2330440282821655, |
|
"learning_rate": 5.785454545454546e-05, |
|
"loss": 1.3692, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 5.138461538461539, |
|
"grad_norm": 1.069877028465271, |
|
"learning_rate": 5.74909090909091e-05, |
|
"loss": 1.2439, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 5.1692307692307695, |
|
"grad_norm": 1.1694751977920532, |
|
"learning_rate": 5.712727272727273e-05, |
|
"loss": 1.1194, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 1.424668312072754, |
|
"learning_rate": 5.6763636363636365e-05, |
|
"loss": 1.456, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 5.230769230769231, |
|
"grad_norm": 1.1166226863861084, |
|
"learning_rate": 5.6399999999999995e-05, |
|
"loss": 1.3011, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.2615384615384615, |
|
"grad_norm": 1.196712851524353, |
|
"learning_rate": 5.6036363636363646e-05, |
|
"loss": 1.3272, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 5.292307692307692, |
|
"grad_norm": 1.105592131614685, |
|
"learning_rate": 5.5672727272727276e-05, |
|
"loss": 1.3831, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 5.323076923076923, |
|
"grad_norm": 1.3789408206939697, |
|
"learning_rate": 5.530909090909091e-05, |
|
"loss": 1.3924, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 5.3538461538461535, |
|
"grad_norm": 1.1058343648910522, |
|
"learning_rate": 5.494545454545454e-05, |
|
"loss": 1.1278, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 5.384615384615385, |
|
"grad_norm": 1.0470004081726074, |
|
"learning_rate": 5.458181818181819e-05, |
|
"loss": 1.2675, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 5.415384615384616, |
|
"grad_norm": 1.5735996961593628, |
|
"learning_rate": 5.421818181818182e-05, |
|
"loss": 1.368, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 5.446153846153846, |
|
"grad_norm": 1.053110122680664, |
|
"learning_rate": 5.385454545454546e-05, |
|
"loss": 1.4409, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 5.476923076923077, |
|
"grad_norm": 1.2032136917114258, |
|
"learning_rate": 5.349090909090909e-05, |
|
"loss": 1.3919, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 5.507692307692308, |
|
"grad_norm": 1.3398489952087402, |
|
"learning_rate": 5.3127272727272726e-05, |
|
"loss": 1.3891, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 5.538461538461538, |
|
"grad_norm": 1.1674134731292725, |
|
"learning_rate": 5.276363636363637e-05, |
|
"loss": 1.4167, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.538461538461538, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.6820106506347656, |
|
"eval_rouge1": 0.6051, |
|
"eval_rouge2": 0.256, |
|
"eval_rougeL": 0.4705, |
|
"eval_runtime": 34.3835, |
|
"eval_samples_per_second": 7.969, |
|
"eval_steps_per_second": 2.007, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.569230769230769, |
|
"grad_norm": 1.3142492771148682, |
|
"learning_rate": 5.2400000000000007e-05, |
|
"loss": 1.2869, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 1.3149932622909546, |
|
"learning_rate": 5.2036363636363637e-05, |
|
"loss": 1.3653, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 5.63076923076923, |
|
"grad_norm": 1.1139847040176392, |
|
"learning_rate": 5.167272727272727e-05, |
|
"loss": 1.3807, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 5.661538461538462, |
|
"grad_norm": 1.302495002746582, |
|
"learning_rate": 5.130909090909091e-05, |
|
"loss": 1.3826, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 5.6923076923076925, |
|
"grad_norm": 1.2617419958114624, |
|
"learning_rate": 5.0945454545454554e-05, |
|
"loss": 1.283, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 5.723076923076923, |
|
"grad_norm": 1.252189040184021, |
|
"learning_rate": 5.0581818181818184e-05, |
|
"loss": 1.3425, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 5.753846153846154, |
|
"grad_norm": 1.0823620557785034, |
|
"learning_rate": 5.021818181818182e-05, |
|
"loss": 1.4646, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 5.7846153846153845, |
|
"grad_norm": 1.39573335647583, |
|
"learning_rate": 4.985454545454546e-05, |
|
"loss": 1.2957, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 5.815384615384615, |
|
"grad_norm": 1.217499852180481, |
|
"learning_rate": 4.9490909090909094e-05, |
|
"loss": 1.3232, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 5.846153846153846, |
|
"grad_norm": 1.3049825429916382, |
|
"learning_rate": 4.912727272727273e-05, |
|
"loss": 1.333, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.876923076923077, |
|
"grad_norm": 1.265807032585144, |
|
"learning_rate": 4.876363636363637e-05, |
|
"loss": 1.2635, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 5.907692307692308, |
|
"grad_norm": 1.4045813083648682, |
|
"learning_rate": 4.8400000000000004e-05, |
|
"loss": 1.3453, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 5.938461538461539, |
|
"grad_norm": 1.4151256084442139, |
|
"learning_rate": 4.803636363636364e-05, |
|
"loss": 1.2274, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 5.969230769230769, |
|
"grad_norm": 1.30918288230896, |
|
"learning_rate": 4.767272727272728e-05, |
|
"loss": 1.3835, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1394106149673462, |
|
"learning_rate": 4.7309090909090914e-05, |
|
"loss": 1.4773, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 6.030769230769231, |
|
"grad_norm": 1.0991594791412354, |
|
"learning_rate": 4.694545454545455e-05, |
|
"loss": 1.1885, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 6.061538461538461, |
|
"grad_norm": 1.2676807641983032, |
|
"learning_rate": 4.658181818181818e-05, |
|
"loss": 1.2931, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 6.092307692307692, |
|
"grad_norm": 1.2844555377960205, |
|
"learning_rate": 4.6218181818181825e-05, |
|
"loss": 1.2758, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 6.123076923076923, |
|
"grad_norm": 1.2364481687545776, |
|
"learning_rate": 4.5854545454545455e-05, |
|
"loss": 1.1871, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 1.4142909049987793, |
|
"learning_rate": 4.54909090909091e-05, |
|
"loss": 1.2976, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.6960315704345703, |
|
"eval_rouge1": 0.6074, |
|
"eval_rouge2": 0.2564, |
|
"eval_rougeL": 0.4698, |
|
"eval_runtime": 33.4283, |
|
"eval_samples_per_second": 8.197, |
|
"eval_steps_per_second": 2.064, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.184615384615385, |
|
"grad_norm": 1.234192132949829, |
|
"learning_rate": 4.512727272727273e-05, |
|
"loss": 1.3096, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 6.2153846153846155, |
|
"grad_norm": 1.1222543716430664, |
|
"learning_rate": 4.4763636363636365e-05, |
|
"loss": 1.2937, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 6.246153846153846, |
|
"grad_norm": 1.0864425897598267, |
|
"learning_rate": 4.44e-05, |
|
"loss": 1.278, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 6.276923076923077, |
|
"grad_norm": 1.3431516885757446, |
|
"learning_rate": 4.403636363636364e-05, |
|
"loss": 1.2601, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 6.3076923076923075, |
|
"grad_norm": 1.2604031562805176, |
|
"learning_rate": 4.3672727272727275e-05, |
|
"loss": 1.3587, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 6.338461538461538, |
|
"grad_norm": 1.4237326383590698, |
|
"learning_rate": 4.330909090909091e-05, |
|
"loss": 1.3936, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 6.36923076923077, |
|
"grad_norm": 1.2190274000167847, |
|
"learning_rate": 4.294545454545455e-05, |
|
"loss": 1.2805, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 1.3023786544799805, |
|
"learning_rate": 4.2581818181818186e-05, |
|
"loss": 1.2676, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 6.430769230769231, |
|
"grad_norm": 1.2170274257659912, |
|
"learning_rate": 4.2218181818181816e-05, |
|
"loss": 1.3696, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 6.461538461538462, |
|
"grad_norm": 1.613784670829773, |
|
"learning_rate": 4.185454545454546e-05, |
|
"loss": 1.28, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.492307692307692, |
|
"grad_norm": 1.2165530920028687, |
|
"learning_rate": 4.149090909090909e-05, |
|
"loss": 1.2933, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 6.523076923076923, |
|
"grad_norm": 1.2213079929351807, |
|
"learning_rate": 4.112727272727273e-05, |
|
"loss": 1.2062, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 6.553846153846154, |
|
"grad_norm": 1.5889174938201904, |
|
"learning_rate": 4.076363636363636e-05, |
|
"loss": 1.2481, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 6.584615384615384, |
|
"grad_norm": 1.2638423442840576, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 1.29, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 6.615384615384615, |
|
"grad_norm": 1.0796576738357544, |
|
"learning_rate": 4.0036363636363636e-05, |
|
"loss": 1.1189, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 6.6461538461538465, |
|
"grad_norm": 1.5910948514938354, |
|
"learning_rate": 3.967272727272727e-05, |
|
"loss": 1.2767, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 6.676923076923077, |
|
"grad_norm": 1.3346668481826782, |
|
"learning_rate": 3.930909090909091e-05, |
|
"loss": 1.2212, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 6.707692307692308, |
|
"grad_norm": 1.1277836561203003, |
|
"learning_rate": 3.8945454545454547e-05, |
|
"loss": 1.2392, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 6.7384615384615385, |
|
"grad_norm": 1.1212108135223389, |
|
"learning_rate": 3.858181818181818e-05, |
|
"loss": 1.3223, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 6.769230769230769, |
|
"grad_norm": 1.2175902128219604, |
|
"learning_rate": 3.821818181818182e-05, |
|
"loss": 1.3124, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.769230769230769, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.6920864582061768, |
|
"eval_rouge1": 0.606, |
|
"eval_rouge2": 0.2554, |
|
"eval_rougeL": 0.4694, |
|
"eval_runtime": 34.3853, |
|
"eval_samples_per_second": 7.969, |
|
"eval_steps_per_second": 2.007, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"grad_norm": 1.163404107093811, |
|
"learning_rate": 3.785454545454546e-05, |
|
"loss": 1.4557, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 6.8307692307692305, |
|
"grad_norm": 1.5031542778015137, |
|
"learning_rate": 3.7490909090909094e-05, |
|
"loss": 1.1773, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 6.861538461538462, |
|
"grad_norm": 1.0126748085021973, |
|
"learning_rate": 3.712727272727273e-05, |
|
"loss": 1.3922, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 6.892307692307693, |
|
"grad_norm": 1.0135952234268188, |
|
"learning_rate": 3.676363636363637e-05, |
|
"loss": 1.2028, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 6.923076923076923, |
|
"grad_norm": 1.16098153591156, |
|
"learning_rate": 3.6400000000000004e-05, |
|
"loss": 1.4111, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 6.953846153846154, |
|
"grad_norm": 1.488234519958496, |
|
"learning_rate": 3.603636363636364e-05, |
|
"loss": 1.3131, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 6.984615384615385, |
|
"grad_norm": 1.129989743232727, |
|
"learning_rate": 3.567272727272728e-05, |
|
"loss": 1.1701, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 7.015384615384615, |
|
"grad_norm": 1.218468189239502, |
|
"learning_rate": 3.530909090909091e-05, |
|
"loss": 1.2604, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 7.046153846153846, |
|
"grad_norm": 1.2339926958084106, |
|
"learning_rate": 3.494545454545455e-05, |
|
"loss": 1.0932, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 7.076923076923077, |
|
"grad_norm": 1.4972765445709229, |
|
"learning_rate": 3.458181818181818e-05, |
|
"loss": 1.1137, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.107692307692307, |
|
"grad_norm": 1.1884584426879883, |
|
"learning_rate": 3.4218181818181824e-05, |
|
"loss": 1.1522, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 7.138461538461539, |
|
"grad_norm": 1.4934840202331543, |
|
"learning_rate": 3.3854545454545454e-05, |
|
"loss": 1.3121, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 7.1692307692307695, |
|
"grad_norm": 1.1432678699493408, |
|
"learning_rate": 3.34909090909091e-05, |
|
"loss": 1.1549, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 1.1708807945251465, |
|
"learning_rate": 3.312727272727273e-05, |
|
"loss": 1.1692, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 7.230769230769231, |
|
"grad_norm": 1.3824517726898193, |
|
"learning_rate": 3.2763636363636365e-05, |
|
"loss": 1.208, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 7.2615384615384615, |
|
"grad_norm": 1.1225407123565674, |
|
"learning_rate": 3.24e-05, |
|
"loss": 1.1542, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 7.292307692307692, |
|
"grad_norm": 1.2445507049560547, |
|
"learning_rate": 3.203636363636364e-05, |
|
"loss": 1.2265, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 7.323076923076923, |
|
"grad_norm": 1.256062626838684, |
|
"learning_rate": 3.1672727272727275e-05, |
|
"loss": 1.1822, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 7.3538461538461535, |
|
"grad_norm": 1.3986501693725586, |
|
"learning_rate": 3.130909090909091e-05, |
|
"loss": 1.262, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.384615384615385, |
|
"grad_norm": 1.1086236238479614, |
|
"learning_rate": 3.094545454545455e-05, |
|
"loss": 1.2275, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.384615384615385, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.6998823881149292, |
|
"eval_rouge1": 0.6055, |
|
"eval_rouge2": 0.2541, |
|
"eval_rougeL": 0.4684, |
|
"eval_runtime": 33.5145, |
|
"eval_samples_per_second": 8.176, |
|
"eval_steps_per_second": 2.059, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.415384615384616, |
|
"grad_norm": 1.5682780742645264, |
|
"learning_rate": 3.0581818181818185e-05, |
|
"loss": 1.3442, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 7.446153846153846, |
|
"grad_norm": 1.034818410873413, |
|
"learning_rate": 3.021818181818182e-05, |
|
"loss": 1.2158, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 7.476923076923077, |
|
"grad_norm": 1.2816352844238281, |
|
"learning_rate": 2.985454545454546e-05, |
|
"loss": 1.1646, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 7.507692307692308, |
|
"grad_norm": 1.252765417098999, |
|
"learning_rate": 2.9490909090909092e-05, |
|
"loss": 1.1985, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 7.538461538461538, |
|
"grad_norm": 1.4074809551239014, |
|
"learning_rate": 2.9127272727272732e-05, |
|
"loss": 1.3245, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 7.569230769230769, |
|
"grad_norm": 1.3757801055908203, |
|
"learning_rate": 2.8763636363636366e-05, |
|
"loss": 1.2856, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"grad_norm": 1.355635643005371, |
|
"learning_rate": 2.84e-05, |
|
"loss": 1.229, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 7.63076923076923, |
|
"grad_norm": 1.185659646987915, |
|
"learning_rate": 2.803636363636364e-05, |
|
"loss": 1.2444, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 7.661538461538462, |
|
"grad_norm": 1.4726060628890991, |
|
"learning_rate": 2.7672727272727273e-05, |
|
"loss": 1.2877, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"grad_norm": 1.525718092918396, |
|
"learning_rate": 2.7309090909090913e-05, |
|
"loss": 1.2993, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.723076923076923, |
|
"grad_norm": 1.2432451248168945, |
|
"learning_rate": 2.6945454545454546e-05, |
|
"loss": 1.2803, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 7.753846153846154, |
|
"grad_norm": 1.2237839698791504, |
|
"learning_rate": 2.6581818181818186e-05, |
|
"loss": 1.2785, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 7.7846153846153845, |
|
"grad_norm": 1.5360924005508423, |
|
"learning_rate": 2.621818181818182e-05, |
|
"loss": 1.3164, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 7.815384615384615, |
|
"grad_norm": 1.4242442846298218, |
|
"learning_rate": 2.5854545454545453e-05, |
|
"loss": 1.1864, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 7.846153846153846, |
|
"grad_norm": 1.0664770603179932, |
|
"learning_rate": 2.5490909090909093e-05, |
|
"loss": 1.2769, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 7.876923076923077, |
|
"grad_norm": 1.3427962064743042, |
|
"learning_rate": 2.5127272727272727e-05, |
|
"loss": 1.183, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 7.907692307692308, |
|
"grad_norm": 1.2692787647247314, |
|
"learning_rate": 2.4763636363636363e-05, |
|
"loss": 1.3775, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 7.938461538461539, |
|
"grad_norm": 1.3220490217208862, |
|
"learning_rate": 2.44e-05, |
|
"loss": 1.1557, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 7.969230769230769, |
|
"grad_norm": 1.31517493724823, |
|
"learning_rate": 2.4036363636363637e-05, |
|
"loss": 1.3144, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.145848274230957, |
|
"learning_rate": 2.3672727272727274e-05, |
|
"loss": 1.4194, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.6980103254318237, |
|
"eval_rouge1": 0.6066, |
|
"eval_rouge2": 0.2565, |
|
"eval_rougeL": 0.4699, |
|
"eval_runtime": 33.5275, |
|
"eval_samples_per_second": 8.172, |
|
"eval_steps_per_second": 2.058, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 8.03076923076923, |
|
"grad_norm": 1.3528770208358765, |
|
"learning_rate": 2.330909090909091e-05, |
|
"loss": 1.2504, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 8.061538461538461, |
|
"grad_norm": 1.1651642322540283, |
|
"learning_rate": 2.2945454545454547e-05, |
|
"loss": 1.0993, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 8.092307692307692, |
|
"grad_norm": 1.1845202445983887, |
|
"learning_rate": 2.258181818181818e-05, |
|
"loss": 1.1356, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 8.123076923076923, |
|
"grad_norm": 1.2000699043273926, |
|
"learning_rate": 2.2218181818181817e-05, |
|
"loss": 1.206, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 8.153846153846153, |
|
"grad_norm": 1.449044108390808, |
|
"learning_rate": 2.1854545454545454e-05, |
|
"loss": 1.2059, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 8.184615384615384, |
|
"grad_norm": 1.2176152467727661, |
|
"learning_rate": 2.149090909090909e-05, |
|
"loss": 1.1849, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 8.215384615384615, |
|
"grad_norm": 1.4765113592147827, |
|
"learning_rate": 2.1127272727272728e-05, |
|
"loss": 1.3351, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 8.246153846153845, |
|
"grad_norm": 1.5038341283798218, |
|
"learning_rate": 2.0763636363636364e-05, |
|
"loss": 1.2766, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 8.276923076923078, |
|
"grad_norm": 1.3483731746673584, |
|
"learning_rate": 2.04e-05, |
|
"loss": 1.1067, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 8.307692307692308, |
|
"grad_norm": 1.025032639503479, |
|
"learning_rate": 2.0036363636363638e-05, |
|
"loss": 1.2155, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.338461538461539, |
|
"grad_norm": 1.3824971914291382, |
|
"learning_rate": 1.9672727272727275e-05, |
|
"loss": 1.1767, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 8.36923076923077, |
|
"grad_norm": 1.2280523777008057, |
|
"learning_rate": 1.930909090909091e-05, |
|
"loss": 1.2881, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 1.2223644256591797, |
|
"learning_rate": 1.8945454545454548e-05, |
|
"loss": 1.1898, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 8.430769230769231, |
|
"grad_norm": 1.349334955215454, |
|
"learning_rate": 1.8581818181818185e-05, |
|
"loss": 1.2984, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 8.461538461538462, |
|
"grad_norm": 1.2894556522369385, |
|
"learning_rate": 1.8218181818181822e-05, |
|
"loss": 1.3458, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 8.492307692307692, |
|
"grad_norm": 1.6086795330047607, |
|
"learning_rate": 1.7854545454545455e-05, |
|
"loss": 1.2394, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 8.523076923076923, |
|
"grad_norm": 1.4955778121948242, |
|
"learning_rate": 1.7490909090909092e-05, |
|
"loss": 1.1506, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 8.553846153846154, |
|
"grad_norm": 1.4156781435012817, |
|
"learning_rate": 1.712727272727273e-05, |
|
"loss": 1.1746, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 8.584615384615384, |
|
"grad_norm": 1.2073826789855957, |
|
"learning_rate": 1.6763636363636365e-05, |
|
"loss": 1.1301, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 8.615384615384615, |
|
"grad_norm": 1.574342966079712, |
|
"learning_rate": 1.6400000000000002e-05, |
|
"loss": 1.3976, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.615384615384615, |
|
"eval_gen_len": 187.0146, |
|
"eval_loss": 1.7040081024169922, |
|
"eval_rouge1": 0.6049, |
|
"eval_rouge2": 0.2551, |
|
"eval_rougeL": 0.469, |
|
"eval_runtime": 34.1499, |
|
"eval_samples_per_second": 8.023, |
|
"eval_steps_per_second": 2.021, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.646153846153846, |
|
"grad_norm": 1.207323670387268, |
|
"learning_rate": 1.603636363636364e-05, |
|
"loss": 1.2123, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 8.676923076923076, |
|
"grad_norm": 1.346170425415039, |
|
"learning_rate": 1.5672727272727272e-05, |
|
"loss": 1.1284, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 8.707692307692307, |
|
"grad_norm": 1.3920952081680298, |
|
"learning_rate": 1.530909090909091e-05, |
|
"loss": 1.1961, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 8.73846153846154, |
|
"grad_norm": 1.4912207126617432, |
|
"learning_rate": 1.4945454545454546e-05, |
|
"loss": 1.2558, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 8.76923076923077, |
|
"grad_norm": 0.9997207522392273, |
|
"learning_rate": 1.4581818181818183e-05, |
|
"loss": 1.1842, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"grad_norm": 1.207138180732727, |
|
"learning_rate": 1.421818181818182e-05, |
|
"loss": 1.2588, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 8.830769230769231, |
|
"grad_norm": 1.398917555809021, |
|
"learning_rate": 1.3854545454545456e-05, |
|
"loss": 1.2445, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 8.861538461538462, |
|
"grad_norm": 1.3793071508407593, |
|
"learning_rate": 1.3490909090909093e-05, |
|
"loss": 1.164, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 8.892307692307693, |
|
"grad_norm": 1.2650920152664185, |
|
"learning_rate": 1.3127272727272726e-05, |
|
"loss": 1.2235, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"grad_norm": 1.3319740295410156, |
|
"learning_rate": 1.2763636363636363e-05, |
|
"loss": 1.1818, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.953846153846154, |
|
"grad_norm": 1.395668387413025, |
|
"learning_rate": 1.24e-05, |
|
"loss": 1.2237, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 8.984615384615385, |
|
"grad_norm": 1.1730422973632812, |
|
"learning_rate": 1.2036363636363637e-05, |
|
"loss": 1.0141, |
|
"step": 2920 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7124807319552000.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|