t5-youtube-summarizer / trainer_state.json
bilal521's picture
Initial model upload
8e71d0d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.0,
"eval_steps": 200,
"global_step": 2925,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03076923076923077,
"grad_norm": 24.726886749267578,
"learning_rate": 1.8e-06,
"loss": 11.4221,
"step": 10
},
{
"epoch": 0.06153846153846154,
"grad_norm": 17.795185089111328,
"learning_rate": 3.8e-06,
"loss": 10.4941,
"step": 20
},
{
"epoch": 0.09230769230769231,
"grad_norm": 14.8720703125,
"learning_rate": 5.8e-06,
"loss": 10.943,
"step": 30
},
{
"epoch": 0.12307692307692308,
"grad_norm": 26.370025634765625,
"learning_rate": 7.8e-06,
"loss": 9.7676,
"step": 40
},
{
"epoch": 0.15384615384615385,
"grad_norm": 25.46526527404785,
"learning_rate": 9.800000000000001e-06,
"loss": 8.0724,
"step": 50
},
{
"epoch": 0.18461538461538463,
"grad_norm": 31.001745223999023,
"learning_rate": 1.18e-05,
"loss": 6.9668,
"step": 60
},
{
"epoch": 0.2153846153846154,
"grad_norm": 9.090025901794434,
"learning_rate": 1.3800000000000002e-05,
"loss": 5.0298,
"step": 70
},
{
"epoch": 0.24615384615384617,
"grad_norm": 4.529256820678711,
"learning_rate": 1.58e-05,
"loss": 3.5286,
"step": 80
},
{
"epoch": 0.27692307692307694,
"grad_norm": 2.912689685821533,
"learning_rate": 1.78e-05,
"loss": 2.9783,
"step": 90
},
{
"epoch": 0.3076923076923077,
"grad_norm": 2.04130220413208,
"learning_rate": 1.9800000000000004e-05,
"loss": 2.5693,
"step": 100
},
{
"epoch": 0.3384615384615385,
"grad_norm": 1.7645025253295898,
"learning_rate": 2.18e-05,
"loss": 2.3032,
"step": 110
},
{
"epoch": 0.36923076923076925,
"grad_norm": 1.4153923988342285,
"learning_rate": 2.38e-05,
"loss": 2.1182,
"step": 120
},
{
"epoch": 0.4,
"grad_norm": 2.1566500663757324,
"learning_rate": 2.58e-05,
"loss": 2.2848,
"step": 130
},
{
"epoch": 0.4307692307692308,
"grad_norm": 1.7678470611572266,
"learning_rate": 2.7800000000000005e-05,
"loss": 2.2928,
"step": 140
},
{
"epoch": 0.46153846153846156,
"grad_norm": 1.717806100845337,
"learning_rate": 2.98e-05,
"loss": 2.0866,
"step": 150
},
{
"epoch": 0.49230769230769234,
"grad_norm": 1.8488136529922485,
"learning_rate": 3.18e-05,
"loss": 2.2323,
"step": 160
},
{
"epoch": 0.5230769230769231,
"grad_norm": 1.8968263864517212,
"learning_rate": 3.38e-05,
"loss": 2.2183,
"step": 170
},
{
"epoch": 0.5538461538461539,
"grad_norm": 1.398100733757019,
"learning_rate": 3.58e-05,
"loss": 1.8395,
"step": 180
},
{
"epoch": 0.5846153846153846,
"grad_norm": 1.5084631443023682,
"learning_rate": 3.7800000000000004e-05,
"loss": 1.9694,
"step": 190
},
{
"epoch": 0.6153846153846154,
"grad_norm": 1.1019172668457031,
"learning_rate": 3.9800000000000005e-05,
"loss": 1.9702,
"step": 200
},
{
"epoch": 0.6153846153846154,
"eval_gen_len": 186.9088,
"eval_loss": 1.8701356649398804,
"eval_rouge1": 0.5735,
"eval_rouge2": 0.2231,
"eval_rougeL": 0.4346,
"eval_runtime": 33.3119,
"eval_samples_per_second": 8.225,
"eval_steps_per_second": 2.071,
"step": 200
},
{
"epoch": 0.6461538461538462,
"grad_norm": 1.5155857801437378,
"learning_rate": 4.18e-05,
"loss": 2.0705,
"step": 210
},
{
"epoch": 0.676923076923077,
"grad_norm": 1.3338611125946045,
"learning_rate": 4.38e-05,
"loss": 2.051,
"step": 220
},
{
"epoch": 0.7076923076923077,
"grad_norm": 1.2012193202972412,
"learning_rate": 4.58e-05,
"loss": 1.9834,
"step": 230
},
{
"epoch": 0.7384615384615385,
"grad_norm": 1.527007818222046,
"learning_rate": 4.78e-05,
"loss": 2.0451,
"step": 240
},
{
"epoch": 0.7692307692307693,
"grad_norm": 1.2146987915039062,
"learning_rate": 4.9800000000000004e-05,
"loss": 1.974,
"step": 250
},
{
"epoch": 0.8,
"grad_norm": 1.7576699256896973,
"learning_rate": 5.1800000000000005e-05,
"loss": 2.0347,
"step": 260
},
{
"epoch": 0.8307692307692308,
"grad_norm": 1.17750084400177,
"learning_rate": 5.380000000000001e-05,
"loss": 1.9763,
"step": 270
},
{
"epoch": 0.8615384615384616,
"grad_norm": 1.3267815113067627,
"learning_rate": 5.580000000000001e-05,
"loss": 1.842,
"step": 280
},
{
"epoch": 0.8923076923076924,
"grad_norm": 1.0520875453948975,
"learning_rate": 5.7799999999999995e-05,
"loss": 2.0525,
"step": 290
},
{
"epoch": 0.9230769230769231,
"grad_norm": 1.4701600074768066,
"learning_rate": 5.9800000000000003e-05,
"loss": 1.7418,
"step": 300
},
{
"epoch": 0.9538461538461539,
"grad_norm": 1.062267780303955,
"learning_rate": 6.18e-05,
"loss": 1.9685,
"step": 310
},
{
"epoch": 0.9846153846153847,
"grad_norm": 1.2724727392196655,
"learning_rate": 6.38e-05,
"loss": 1.7972,
"step": 320
},
{
"epoch": 1.0153846153846153,
"grad_norm": 1.2834393978118896,
"learning_rate": 6.58e-05,
"loss": 1.8395,
"step": 330
},
{
"epoch": 1.0461538461538462,
"grad_norm": 0.9657095074653625,
"learning_rate": 6.780000000000001e-05,
"loss": 1.836,
"step": 340
},
{
"epoch": 1.0769230769230769,
"grad_norm": 1.0390011072158813,
"learning_rate": 6.98e-05,
"loss": 1.9328,
"step": 350
},
{
"epoch": 1.1076923076923078,
"grad_norm": 1.2896322011947632,
"learning_rate": 7.18e-05,
"loss": 1.8227,
"step": 360
},
{
"epoch": 1.1384615384615384,
"grad_norm": 1.53290593624115,
"learning_rate": 7.38e-05,
"loss": 1.9214,
"step": 370
},
{
"epoch": 1.1692307692307693,
"grad_norm": 1.0145893096923828,
"learning_rate": 7.58e-05,
"loss": 1.8295,
"step": 380
},
{
"epoch": 1.2,
"grad_norm": 1.2127511501312256,
"learning_rate": 7.780000000000001e-05,
"loss": 1.7774,
"step": 390
},
{
"epoch": 1.2307692307692308,
"grad_norm": 1.1971853971481323,
"learning_rate": 7.98e-05,
"loss": 1.9926,
"step": 400
},
{
"epoch": 1.2307692307692308,
"eval_gen_len": 187.0146,
"eval_loss": 1.7412512302398682,
"eval_rouge1": 0.5961,
"eval_rouge2": 0.2459,
"eval_rougeL": 0.4577,
"eval_runtime": 33.1213,
"eval_samples_per_second": 8.273,
"eval_steps_per_second": 2.083,
"step": 400
},
{
"epoch": 1.2615384615384615,
"grad_norm": 1.107351303100586,
"learning_rate": 8.18e-05,
"loss": 1.8515,
"step": 410
},
{
"epoch": 1.2923076923076924,
"grad_norm": 1.1407504081726074,
"learning_rate": 8.38e-05,
"loss": 1.7011,
"step": 420
},
{
"epoch": 1.323076923076923,
"grad_norm": 1.418338656425476,
"learning_rate": 8.58e-05,
"loss": 1.6384,
"step": 430
},
{
"epoch": 1.353846153846154,
"grad_norm": 1.3715286254882812,
"learning_rate": 8.78e-05,
"loss": 1.8502,
"step": 440
},
{
"epoch": 1.3846153846153846,
"grad_norm": 0.9774390459060669,
"learning_rate": 8.98e-05,
"loss": 1.8264,
"step": 450
},
{
"epoch": 1.4153846153846155,
"grad_norm": 1.4778176546096802,
"learning_rate": 9.180000000000001e-05,
"loss": 1.694,
"step": 460
},
{
"epoch": 1.4461538461538461,
"grad_norm": 1.2721563577651978,
"learning_rate": 9.38e-05,
"loss": 1.8213,
"step": 470
},
{
"epoch": 1.476923076923077,
"grad_norm": 0.94813472032547,
"learning_rate": 9.58e-05,
"loss": 1.6636,
"step": 480
},
{
"epoch": 1.5076923076923077,
"grad_norm": 1.0905983448028564,
"learning_rate": 9.78e-05,
"loss": 1.7712,
"step": 490
},
{
"epoch": 1.5384615384615383,
"grad_norm": 1.1593286991119385,
"learning_rate": 9.98e-05,
"loss": 1.808,
"step": 500
},
{
"epoch": 1.5692307692307692,
"grad_norm": 1.0593713521957397,
"learning_rate": 9.967272727272727e-05,
"loss": 1.82,
"step": 510
},
{
"epoch": 1.6,
"grad_norm": 0.941973865032196,
"learning_rate": 9.930909090909092e-05,
"loss": 1.7341,
"step": 520
},
{
"epoch": 1.6307692307692307,
"grad_norm": 0.80891352891922,
"learning_rate": 9.894545454545455e-05,
"loss": 1.6166,
"step": 530
},
{
"epoch": 1.6615384615384614,
"grad_norm": 1.0325396060943604,
"learning_rate": 9.858181818181819e-05,
"loss": 1.8333,
"step": 540
},
{
"epoch": 1.6923076923076923,
"grad_norm": 1.3042590618133545,
"learning_rate": 9.821818181818182e-05,
"loss": 1.6287,
"step": 550
},
{
"epoch": 1.7230769230769232,
"grad_norm": 1.475900650024414,
"learning_rate": 9.785454545454545e-05,
"loss": 1.6019,
"step": 560
},
{
"epoch": 1.7538461538461538,
"grad_norm": 1.1589939594268799,
"learning_rate": 9.74909090909091e-05,
"loss": 1.6904,
"step": 570
},
{
"epoch": 1.7846153846153845,
"grad_norm": 1.2714788913726807,
"learning_rate": 9.712727272727274e-05,
"loss": 1.7928,
"step": 580
},
{
"epoch": 1.8153846153846154,
"grad_norm": 1.2037074565887451,
"learning_rate": 9.676363636363637e-05,
"loss": 1.8325,
"step": 590
},
{
"epoch": 1.8461538461538463,
"grad_norm": 1.1115801334381104,
"learning_rate": 9.64e-05,
"loss": 1.7673,
"step": 600
},
{
"epoch": 1.8461538461538463,
"eval_gen_len": 187.0146,
"eval_loss": 1.69492506980896,
"eval_rouge1": 0.6004,
"eval_rouge2": 0.2505,
"eval_rougeL": 0.4658,
"eval_runtime": 33.2205,
"eval_samples_per_second": 8.248,
"eval_steps_per_second": 2.077,
"step": 600
},
{
"epoch": 1.876923076923077,
"grad_norm": 1.2008461952209473,
"learning_rate": 9.603636363636364e-05,
"loss": 1.7674,
"step": 610
},
{
"epoch": 1.9076923076923076,
"grad_norm": 1.1482900381088257,
"learning_rate": 9.567272727272729e-05,
"loss": 1.7932,
"step": 620
},
{
"epoch": 1.9384615384615385,
"grad_norm": 1.0144352912902832,
"learning_rate": 9.530909090909092e-05,
"loss": 1.6315,
"step": 630
},
{
"epoch": 1.9692307692307693,
"grad_norm": 0.9276631474494934,
"learning_rate": 9.494545454545455e-05,
"loss": 1.8373,
"step": 640
},
{
"epoch": 2.0,
"grad_norm": 1.0593888759613037,
"learning_rate": 9.458181818181819e-05,
"loss": 1.7967,
"step": 650
},
{
"epoch": 2.0307692307692307,
"grad_norm": 1.1406164169311523,
"learning_rate": 9.421818181818183e-05,
"loss": 1.7668,
"step": 660
},
{
"epoch": 2.0615384615384613,
"grad_norm": 0.9809508919715881,
"learning_rate": 9.385454545454546e-05,
"loss": 1.6602,
"step": 670
},
{
"epoch": 2.0923076923076924,
"grad_norm": 1.1698426008224487,
"learning_rate": 9.349090909090909e-05,
"loss": 1.6775,
"step": 680
},
{
"epoch": 2.123076923076923,
"grad_norm": 1.1539372205734253,
"learning_rate": 9.312727272727274e-05,
"loss": 1.533,
"step": 690
},
{
"epoch": 2.1538461538461537,
"grad_norm": 1.0783981084823608,
"learning_rate": 9.276363636363637e-05,
"loss": 1.5243,
"step": 700
},
{
"epoch": 2.184615384615385,
"grad_norm": 1.262705683708191,
"learning_rate": 9.240000000000001e-05,
"loss": 1.6625,
"step": 710
},
{
"epoch": 2.2153846153846155,
"grad_norm": 1.1545718908309937,
"learning_rate": 9.203636363636364e-05,
"loss": 1.9172,
"step": 720
},
{
"epoch": 2.246153846153846,
"grad_norm": 0.9896947741508484,
"learning_rate": 9.167272727272728e-05,
"loss": 1.5449,
"step": 730
},
{
"epoch": 2.276923076923077,
"grad_norm": 1.063262701034546,
"learning_rate": 9.130909090909091e-05,
"loss": 1.5318,
"step": 740
},
{
"epoch": 2.3076923076923075,
"grad_norm": 1.0846728086471558,
"learning_rate": 9.094545454545454e-05,
"loss": 1.5875,
"step": 750
},
{
"epoch": 2.3384615384615386,
"grad_norm": 1.0437549352645874,
"learning_rate": 9.058181818181819e-05,
"loss": 1.5724,
"step": 760
},
{
"epoch": 2.3692307692307693,
"grad_norm": 1.050115942955017,
"learning_rate": 9.021818181818183e-05,
"loss": 1.662,
"step": 770
},
{
"epoch": 2.4,
"grad_norm": 0.9663347601890564,
"learning_rate": 8.985454545454546e-05,
"loss": 1.6284,
"step": 780
},
{
"epoch": 2.430769230769231,
"grad_norm": 1.1656932830810547,
"learning_rate": 8.949090909090909e-05,
"loss": 1.5995,
"step": 790
},
{
"epoch": 2.4615384615384617,
"grad_norm": 1.073716402053833,
"learning_rate": 8.912727272727273e-05,
"loss": 1.4811,
"step": 800
},
{
"epoch": 2.4615384615384617,
"eval_gen_len": 187.0146,
"eval_loss": 1.6769312620162964,
"eval_rouge1": 0.6042,
"eval_rouge2": 0.2561,
"eval_rougeL": 0.4686,
"eval_runtime": 33.5273,
"eval_samples_per_second": 8.172,
"eval_steps_per_second": 2.058,
"step": 800
},
{
"epoch": 2.4923076923076923,
"grad_norm": 0.9764583110809326,
"learning_rate": 8.876363636363638e-05,
"loss": 1.5478,
"step": 810
},
{
"epoch": 2.523076923076923,
"grad_norm": 0.9336417317390442,
"learning_rate": 8.840000000000001e-05,
"loss": 1.5138,
"step": 820
},
{
"epoch": 2.5538461538461537,
"grad_norm": 0.9714758992195129,
"learning_rate": 8.803636363636364e-05,
"loss": 1.5506,
"step": 830
},
{
"epoch": 2.5846153846153848,
"grad_norm": 0.9208464622497559,
"learning_rate": 8.767272727272727e-05,
"loss": 1.4944,
"step": 840
},
{
"epoch": 2.6153846153846154,
"grad_norm": 1.0252026319503784,
"learning_rate": 8.730909090909092e-05,
"loss": 1.6991,
"step": 850
},
{
"epoch": 2.646153846153846,
"grad_norm": 1.0464015007019043,
"learning_rate": 8.694545454545455e-05,
"loss": 1.679,
"step": 860
},
{
"epoch": 2.676923076923077,
"grad_norm": 1.3673149347305298,
"learning_rate": 8.658181818181818e-05,
"loss": 1.5021,
"step": 870
},
{
"epoch": 2.707692307692308,
"grad_norm": 1.1350778341293335,
"learning_rate": 8.621818181818181e-05,
"loss": 1.5898,
"step": 880
},
{
"epoch": 2.7384615384615385,
"grad_norm": 0.9916401505470276,
"learning_rate": 8.585454545454546e-05,
"loss": 1.6542,
"step": 890
},
{
"epoch": 2.769230769230769,
"grad_norm": 0.9967766404151917,
"learning_rate": 8.54909090909091e-05,
"loss": 1.7056,
"step": 900
},
{
"epoch": 2.8,
"grad_norm": 1.2031991481781006,
"learning_rate": 8.512727272727273e-05,
"loss": 1.6856,
"step": 910
},
{
"epoch": 2.830769230769231,
"grad_norm": 1.0159794092178345,
"learning_rate": 8.476363636363636e-05,
"loss": 1.5293,
"step": 920
},
{
"epoch": 2.8615384615384616,
"grad_norm": 1.3572866916656494,
"learning_rate": 8.44e-05,
"loss": 1.6191,
"step": 930
},
{
"epoch": 2.8923076923076922,
"grad_norm": 1.2567291259765625,
"learning_rate": 8.403636363636364e-05,
"loss": 1.7504,
"step": 940
},
{
"epoch": 2.9230769230769234,
"grad_norm": 1.2280553579330444,
"learning_rate": 8.367272727272728e-05,
"loss": 1.6523,
"step": 950
},
{
"epoch": 2.953846153846154,
"grad_norm": 1.0409953594207764,
"learning_rate": 8.330909090909091e-05,
"loss": 1.5903,
"step": 960
},
{
"epoch": 2.9846153846153847,
"grad_norm": 1.10386061668396,
"learning_rate": 8.294545454545455e-05,
"loss": 1.5235,
"step": 970
},
{
"epoch": 3.0153846153846153,
"grad_norm": 1.0341882705688477,
"learning_rate": 8.258181818181818e-05,
"loss": 1.6025,
"step": 980
},
{
"epoch": 3.046153846153846,
"grad_norm": 1.3020343780517578,
"learning_rate": 8.221818181818183e-05,
"loss": 1.4696,
"step": 990
},
{
"epoch": 3.076923076923077,
"grad_norm": 1.104643702507019,
"learning_rate": 8.185454545454546e-05,
"loss": 1.4009,
"step": 1000
},
{
"epoch": 3.076923076923077,
"eval_gen_len": 187.0146,
"eval_loss": 1.6721168756484985,
"eval_rouge1": 0.6044,
"eval_rouge2": 0.2558,
"eval_rougeL": 0.4692,
"eval_runtime": 34.4765,
"eval_samples_per_second": 7.947,
"eval_steps_per_second": 2.001,
"step": 1000
},
{
"epoch": 3.1076923076923078,
"grad_norm": 1.1327263116836548,
"learning_rate": 8.14909090909091e-05,
"loss": 1.5893,
"step": 1010
},
{
"epoch": 3.1384615384615384,
"grad_norm": 1.168095350265503,
"learning_rate": 8.112727272727273e-05,
"loss": 1.4248,
"step": 1020
},
{
"epoch": 3.169230769230769,
"grad_norm": 0.9978489279747009,
"learning_rate": 8.076363636363636e-05,
"loss": 1.5407,
"step": 1030
},
{
"epoch": 3.2,
"grad_norm": 1.0370062589645386,
"learning_rate": 8.04e-05,
"loss": 1.4867,
"step": 1040
},
{
"epoch": 3.230769230769231,
"grad_norm": 0.9647369384765625,
"learning_rate": 8.003636363636365e-05,
"loss": 1.4806,
"step": 1050
},
{
"epoch": 3.2615384615384615,
"grad_norm": 1.3316948413848877,
"learning_rate": 7.967272727272728e-05,
"loss": 1.4612,
"step": 1060
},
{
"epoch": 3.292307692307692,
"grad_norm": 1.37971830368042,
"learning_rate": 7.93090909090909e-05,
"loss": 1.5745,
"step": 1070
},
{
"epoch": 3.3230769230769233,
"grad_norm": 1.1220242977142334,
"learning_rate": 7.894545454545455e-05,
"loss": 1.3228,
"step": 1080
},
{
"epoch": 3.353846153846154,
"grad_norm": 1.0595531463623047,
"learning_rate": 7.85818181818182e-05,
"loss": 1.4618,
"step": 1090
},
{
"epoch": 3.3846153846153846,
"grad_norm": 1.3739666938781738,
"learning_rate": 7.821818181818182e-05,
"loss": 1.4973,
"step": 1100
},
{
"epoch": 3.4153846153846152,
"grad_norm": 1.2643866539001465,
"learning_rate": 7.785454545454545e-05,
"loss": 1.533,
"step": 1110
},
{
"epoch": 3.4461538461538463,
"grad_norm": 1.232230544090271,
"learning_rate": 7.74909090909091e-05,
"loss": 1.4867,
"step": 1120
},
{
"epoch": 3.476923076923077,
"grad_norm": 0.9712868332862854,
"learning_rate": 7.712727272727273e-05,
"loss": 1.4916,
"step": 1130
},
{
"epoch": 3.5076923076923077,
"grad_norm": 1.3414063453674316,
"learning_rate": 7.676363636363637e-05,
"loss": 1.5992,
"step": 1140
},
{
"epoch": 3.5384615384615383,
"grad_norm": 1.0222588777542114,
"learning_rate": 7.64e-05,
"loss": 1.5378,
"step": 1150
},
{
"epoch": 3.569230769230769,
"grad_norm": 1.1905276775360107,
"learning_rate": 7.603636363636364e-05,
"loss": 1.4324,
"step": 1160
},
{
"epoch": 3.6,
"grad_norm": 1.2846956253051758,
"learning_rate": 7.567272727272727e-05,
"loss": 1.4927,
"step": 1170
},
{
"epoch": 3.6307692307692307,
"grad_norm": 1.165310025215149,
"learning_rate": 7.530909090909092e-05,
"loss": 1.4277,
"step": 1180
},
{
"epoch": 3.6615384615384614,
"grad_norm": 1.063883900642395,
"learning_rate": 7.494545454545455e-05,
"loss": 1.5226,
"step": 1190
},
{
"epoch": 3.6923076923076925,
"grad_norm": 1.3430577516555786,
"learning_rate": 7.458181818181819e-05,
"loss": 1.5315,
"step": 1200
},
{
"epoch": 3.6923076923076925,
"eval_gen_len": 187.0146,
"eval_loss": 1.6592342853546143,
"eval_rouge1": 0.6057,
"eval_rouge2": 0.2572,
"eval_rougeL": 0.4709,
"eval_runtime": 33.3732,
"eval_samples_per_second": 8.21,
"eval_steps_per_second": 2.068,
"step": 1200
},
{
"epoch": 3.723076923076923,
"grad_norm": 1.2028673887252808,
"learning_rate": 7.421818181818182e-05,
"loss": 1.5135,
"step": 1210
},
{
"epoch": 3.753846153846154,
"grad_norm": 0.9091282486915588,
"learning_rate": 7.385454545454545e-05,
"loss": 1.3876,
"step": 1220
},
{
"epoch": 3.7846153846153845,
"grad_norm": 0.9549902677536011,
"learning_rate": 7.34909090909091e-05,
"loss": 1.7408,
"step": 1230
},
{
"epoch": 3.815384615384615,
"grad_norm": 1.109423279762268,
"learning_rate": 7.312727272727274e-05,
"loss": 1.3897,
"step": 1240
},
{
"epoch": 3.8461538461538463,
"grad_norm": 1.1412984132766724,
"learning_rate": 7.276363636363637e-05,
"loss": 1.5034,
"step": 1250
},
{
"epoch": 3.876923076923077,
"grad_norm": 1.176283359527588,
"learning_rate": 7.24e-05,
"loss": 1.5426,
"step": 1260
},
{
"epoch": 3.9076923076923076,
"grad_norm": 1.3558485507965088,
"learning_rate": 7.203636363636364e-05,
"loss": 1.5287,
"step": 1270
},
{
"epoch": 3.9384615384615387,
"grad_norm": 1.23688805103302,
"learning_rate": 7.167272727272729e-05,
"loss": 1.5794,
"step": 1280
},
{
"epoch": 3.9692307692307693,
"grad_norm": 1.248528242111206,
"learning_rate": 7.130909090909092e-05,
"loss": 1.5166,
"step": 1290
},
{
"epoch": 4.0,
"grad_norm": 0.8396208882331848,
"learning_rate": 7.094545454545455e-05,
"loss": 1.4648,
"step": 1300
},
{
"epoch": 4.030769230769231,
"grad_norm": 1.0756162405014038,
"learning_rate": 7.058181818181819e-05,
"loss": 1.4147,
"step": 1310
},
{
"epoch": 4.061538461538461,
"grad_norm": 1.1731723546981812,
"learning_rate": 7.021818181818182e-05,
"loss": 1.3732,
"step": 1320
},
{
"epoch": 4.092307692307692,
"grad_norm": 1.2993149757385254,
"learning_rate": 6.985454545454546e-05,
"loss": 1.4118,
"step": 1330
},
{
"epoch": 4.123076923076923,
"grad_norm": 1.1648204326629639,
"learning_rate": 6.949090909090909e-05,
"loss": 1.4258,
"step": 1340
},
{
"epoch": 4.153846153846154,
"grad_norm": 1.1242045164108276,
"learning_rate": 6.912727272727274e-05,
"loss": 1.3598,
"step": 1350
},
{
"epoch": 4.184615384615385,
"grad_norm": 1.53397536277771,
"learning_rate": 6.876363636363637e-05,
"loss": 1.3431,
"step": 1360
},
{
"epoch": 4.2153846153846155,
"grad_norm": 1.3859331607818604,
"learning_rate": 6.840000000000001e-05,
"loss": 1.5014,
"step": 1370
},
{
"epoch": 4.246153846153846,
"grad_norm": 1.0821737051010132,
"learning_rate": 6.803636363636364e-05,
"loss": 1.2638,
"step": 1380
},
{
"epoch": 4.276923076923077,
"grad_norm": 1.2648742198944092,
"learning_rate": 6.767272727272728e-05,
"loss": 1.4567,
"step": 1390
},
{
"epoch": 4.3076923076923075,
"grad_norm": 1.1357372999191284,
"learning_rate": 6.730909090909091e-05,
"loss": 1.4706,
"step": 1400
},
{
"epoch": 4.3076923076923075,
"eval_gen_len": 187.0146,
"eval_loss": 1.6691502332687378,
"eval_rouge1": 0.6061,
"eval_rouge2": 0.2593,
"eval_rougeL": 0.4719,
"eval_runtime": 33.439,
"eval_samples_per_second": 8.194,
"eval_steps_per_second": 2.063,
"step": 1400
},
{
"epoch": 4.338461538461538,
"grad_norm": 1.4324264526367188,
"learning_rate": 6.694545454545454e-05,
"loss": 1.4032,
"step": 1410
},
{
"epoch": 4.36923076923077,
"grad_norm": 1.3646095991134644,
"learning_rate": 6.658181818181819e-05,
"loss": 1.3943,
"step": 1420
},
{
"epoch": 4.4,
"grad_norm": 0.9991398453712463,
"learning_rate": 6.621818181818183e-05,
"loss": 1.5292,
"step": 1430
},
{
"epoch": 4.430769230769231,
"grad_norm": 1.1873986721038818,
"learning_rate": 6.585454545454546e-05,
"loss": 1.4813,
"step": 1440
},
{
"epoch": 4.461538461538462,
"grad_norm": 1.0080267190933228,
"learning_rate": 6.549090909090909e-05,
"loss": 1.4951,
"step": 1450
},
{
"epoch": 4.492307692307692,
"grad_norm": 1.5542734861373901,
"learning_rate": 6.512727272727272e-05,
"loss": 1.5603,
"step": 1460
},
{
"epoch": 4.523076923076923,
"grad_norm": 1.2610498666763306,
"learning_rate": 6.476363636363638e-05,
"loss": 1.3286,
"step": 1470
},
{
"epoch": 4.553846153846154,
"grad_norm": 1.0882760286331177,
"learning_rate": 6.440000000000001e-05,
"loss": 1.309,
"step": 1480
},
{
"epoch": 4.584615384615384,
"grad_norm": 1.1589834690093994,
"learning_rate": 6.403636363636364e-05,
"loss": 1.414,
"step": 1490
},
{
"epoch": 4.615384615384615,
"grad_norm": 1.3731642961502075,
"learning_rate": 6.367272727272727e-05,
"loss": 1.4529,
"step": 1500
},
{
"epoch": 4.6461538461538465,
"grad_norm": 1.3053221702575684,
"learning_rate": 6.330909090909091e-05,
"loss": 1.4623,
"step": 1510
},
{
"epoch": 4.676923076923077,
"grad_norm": 1.2154396772384644,
"learning_rate": 6.294545454545455e-05,
"loss": 1.4766,
"step": 1520
},
{
"epoch": 4.707692307692308,
"grad_norm": 1.0947812795639038,
"learning_rate": 6.258181818181818e-05,
"loss": 1.3212,
"step": 1530
},
{
"epoch": 4.7384615384615385,
"grad_norm": 1.005462646484375,
"learning_rate": 6.221818181818181e-05,
"loss": 1.3956,
"step": 1540
},
{
"epoch": 4.769230769230769,
"grad_norm": 1.196108341217041,
"learning_rate": 6.185454545454546e-05,
"loss": 1.4048,
"step": 1550
},
{
"epoch": 4.8,
"grad_norm": 1.355747103691101,
"learning_rate": 6.14909090909091e-05,
"loss": 1.3474,
"step": 1560
},
{
"epoch": 4.8307692307692305,
"grad_norm": 1.177310585975647,
"learning_rate": 6.112727272727273e-05,
"loss": 1.3038,
"step": 1570
},
{
"epoch": 4.861538461538462,
"grad_norm": 1.273474097251892,
"learning_rate": 6.076363636363637e-05,
"loss": 1.3858,
"step": 1580
},
{
"epoch": 4.892307692307693,
"grad_norm": 1.2601613998413086,
"learning_rate": 6.04e-05,
"loss": 1.2742,
"step": 1590
},
{
"epoch": 4.923076923076923,
"grad_norm": 1.052040696144104,
"learning_rate": 6.0036363636363634e-05,
"loss": 1.5551,
"step": 1600
},
{
"epoch": 4.923076923076923,
"eval_gen_len": 187.0146,
"eval_loss": 1.6678508520126343,
"eval_rouge1": 0.6061,
"eval_rouge2": 0.2582,
"eval_rougeL": 0.4724,
"eval_runtime": 33.608,
"eval_samples_per_second": 8.153,
"eval_steps_per_second": 2.053,
"step": 1600
},
{
"epoch": 4.953846153846154,
"grad_norm": 1.4437050819396973,
"learning_rate": 5.967272727272728e-05,
"loss": 1.3613,
"step": 1610
},
{
"epoch": 4.984615384615385,
"grad_norm": 1.398398518562317,
"learning_rate": 5.9309090909090915e-05,
"loss": 1.3861,
"step": 1620
},
{
"epoch": 5.015384615384615,
"grad_norm": 1.1901689767837524,
"learning_rate": 5.894545454545455e-05,
"loss": 1.4525,
"step": 1630
},
{
"epoch": 5.046153846153846,
"grad_norm": 1.2631349563598633,
"learning_rate": 5.858181818181818e-05,
"loss": 1.3698,
"step": 1640
},
{
"epoch": 5.076923076923077,
"grad_norm": 1.3628337383270264,
"learning_rate": 5.821818181818182e-05,
"loss": 1.1917,
"step": 1650
},
{
"epoch": 5.107692307692307,
"grad_norm": 1.2330440282821655,
"learning_rate": 5.785454545454546e-05,
"loss": 1.3692,
"step": 1660
},
{
"epoch": 5.138461538461539,
"grad_norm": 1.069877028465271,
"learning_rate": 5.74909090909091e-05,
"loss": 1.2439,
"step": 1670
},
{
"epoch": 5.1692307692307695,
"grad_norm": 1.1694751977920532,
"learning_rate": 5.712727272727273e-05,
"loss": 1.1194,
"step": 1680
},
{
"epoch": 5.2,
"grad_norm": 1.424668312072754,
"learning_rate": 5.6763636363636365e-05,
"loss": 1.456,
"step": 1690
},
{
"epoch": 5.230769230769231,
"grad_norm": 1.1166226863861084,
"learning_rate": 5.6399999999999995e-05,
"loss": 1.3011,
"step": 1700
},
{
"epoch": 5.2615384615384615,
"grad_norm": 1.196712851524353,
"learning_rate": 5.6036363636363646e-05,
"loss": 1.3272,
"step": 1710
},
{
"epoch": 5.292307692307692,
"grad_norm": 1.105592131614685,
"learning_rate": 5.5672727272727276e-05,
"loss": 1.3831,
"step": 1720
},
{
"epoch": 5.323076923076923,
"grad_norm": 1.3789408206939697,
"learning_rate": 5.530909090909091e-05,
"loss": 1.3924,
"step": 1730
},
{
"epoch": 5.3538461538461535,
"grad_norm": 1.1058343648910522,
"learning_rate": 5.494545454545454e-05,
"loss": 1.1278,
"step": 1740
},
{
"epoch": 5.384615384615385,
"grad_norm": 1.0470004081726074,
"learning_rate": 5.458181818181819e-05,
"loss": 1.2675,
"step": 1750
},
{
"epoch": 5.415384615384616,
"grad_norm": 1.5735996961593628,
"learning_rate": 5.421818181818182e-05,
"loss": 1.368,
"step": 1760
},
{
"epoch": 5.446153846153846,
"grad_norm": 1.053110122680664,
"learning_rate": 5.385454545454546e-05,
"loss": 1.4409,
"step": 1770
},
{
"epoch": 5.476923076923077,
"grad_norm": 1.2032136917114258,
"learning_rate": 5.349090909090909e-05,
"loss": 1.3919,
"step": 1780
},
{
"epoch": 5.507692307692308,
"grad_norm": 1.3398489952087402,
"learning_rate": 5.3127272727272726e-05,
"loss": 1.3891,
"step": 1790
},
{
"epoch": 5.538461538461538,
"grad_norm": 1.1674134731292725,
"learning_rate": 5.276363636363637e-05,
"loss": 1.4167,
"step": 1800
},
{
"epoch": 5.538461538461538,
"eval_gen_len": 187.0146,
"eval_loss": 1.6820106506347656,
"eval_rouge1": 0.6051,
"eval_rouge2": 0.256,
"eval_rougeL": 0.4705,
"eval_runtime": 34.3835,
"eval_samples_per_second": 7.969,
"eval_steps_per_second": 2.007,
"step": 1800
},
{
"epoch": 5.569230769230769,
"grad_norm": 1.3142492771148682,
"learning_rate": 5.2400000000000007e-05,
"loss": 1.2869,
"step": 1810
},
{
"epoch": 5.6,
"grad_norm": 1.3149932622909546,
"learning_rate": 5.2036363636363637e-05,
"loss": 1.3653,
"step": 1820
},
{
"epoch": 5.63076923076923,
"grad_norm": 1.1139847040176392,
"learning_rate": 5.167272727272727e-05,
"loss": 1.3807,
"step": 1830
},
{
"epoch": 5.661538461538462,
"grad_norm": 1.302495002746582,
"learning_rate": 5.130909090909091e-05,
"loss": 1.3826,
"step": 1840
},
{
"epoch": 5.6923076923076925,
"grad_norm": 1.2617419958114624,
"learning_rate": 5.0945454545454554e-05,
"loss": 1.283,
"step": 1850
},
{
"epoch": 5.723076923076923,
"grad_norm": 1.252189040184021,
"learning_rate": 5.0581818181818184e-05,
"loss": 1.3425,
"step": 1860
},
{
"epoch": 5.753846153846154,
"grad_norm": 1.0823620557785034,
"learning_rate": 5.021818181818182e-05,
"loss": 1.4646,
"step": 1870
},
{
"epoch": 5.7846153846153845,
"grad_norm": 1.39573335647583,
"learning_rate": 4.985454545454546e-05,
"loss": 1.2957,
"step": 1880
},
{
"epoch": 5.815384615384615,
"grad_norm": 1.217499852180481,
"learning_rate": 4.9490909090909094e-05,
"loss": 1.3232,
"step": 1890
},
{
"epoch": 5.846153846153846,
"grad_norm": 1.3049825429916382,
"learning_rate": 4.912727272727273e-05,
"loss": 1.333,
"step": 1900
},
{
"epoch": 5.876923076923077,
"grad_norm": 1.265807032585144,
"learning_rate": 4.876363636363637e-05,
"loss": 1.2635,
"step": 1910
},
{
"epoch": 5.907692307692308,
"grad_norm": 1.4045813083648682,
"learning_rate": 4.8400000000000004e-05,
"loss": 1.3453,
"step": 1920
},
{
"epoch": 5.938461538461539,
"grad_norm": 1.4151256084442139,
"learning_rate": 4.803636363636364e-05,
"loss": 1.2274,
"step": 1930
},
{
"epoch": 5.969230769230769,
"grad_norm": 1.30918288230896,
"learning_rate": 4.767272727272728e-05,
"loss": 1.3835,
"step": 1940
},
{
"epoch": 6.0,
"grad_norm": 1.1394106149673462,
"learning_rate": 4.7309090909090914e-05,
"loss": 1.4773,
"step": 1950
},
{
"epoch": 6.030769230769231,
"grad_norm": 1.0991594791412354,
"learning_rate": 4.694545454545455e-05,
"loss": 1.1885,
"step": 1960
},
{
"epoch": 6.061538461538461,
"grad_norm": 1.2676807641983032,
"learning_rate": 4.658181818181818e-05,
"loss": 1.2931,
"step": 1970
},
{
"epoch": 6.092307692307692,
"grad_norm": 1.2844555377960205,
"learning_rate": 4.6218181818181825e-05,
"loss": 1.2758,
"step": 1980
},
{
"epoch": 6.123076923076923,
"grad_norm": 1.2364481687545776,
"learning_rate": 4.5854545454545455e-05,
"loss": 1.1871,
"step": 1990
},
{
"epoch": 6.153846153846154,
"grad_norm": 1.4142909049987793,
"learning_rate": 4.54909090909091e-05,
"loss": 1.2976,
"step": 2000
},
{
"epoch": 6.153846153846154,
"eval_gen_len": 187.0146,
"eval_loss": 1.6960315704345703,
"eval_rouge1": 0.6074,
"eval_rouge2": 0.2564,
"eval_rougeL": 0.4698,
"eval_runtime": 33.4283,
"eval_samples_per_second": 8.197,
"eval_steps_per_second": 2.064,
"step": 2000
},
{
"epoch": 6.184615384615385,
"grad_norm": 1.234192132949829,
"learning_rate": 4.512727272727273e-05,
"loss": 1.3096,
"step": 2010
},
{
"epoch": 6.2153846153846155,
"grad_norm": 1.1222543716430664,
"learning_rate": 4.4763636363636365e-05,
"loss": 1.2937,
"step": 2020
},
{
"epoch": 6.246153846153846,
"grad_norm": 1.0864425897598267,
"learning_rate": 4.44e-05,
"loss": 1.278,
"step": 2030
},
{
"epoch": 6.276923076923077,
"grad_norm": 1.3431516885757446,
"learning_rate": 4.403636363636364e-05,
"loss": 1.2601,
"step": 2040
},
{
"epoch": 6.3076923076923075,
"grad_norm": 1.2604031562805176,
"learning_rate": 4.3672727272727275e-05,
"loss": 1.3587,
"step": 2050
},
{
"epoch": 6.338461538461538,
"grad_norm": 1.4237326383590698,
"learning_rate": 4.330909090909091e-05,
"loss": 1.3936,
"step": 2060
},
{
"epoch": 6.36923076923077,
"grad_norm": 1.2190274000167847,
"learning_rate": 4.294545454545455e-05,
"loss": 1.2805,
"step": 2070
},
{
"epoch": 6.4,
"grad_norm": 1.3023786544799805,
"learning_rate": 4.2581818181818186e-05,
"loss": 1.2676,
"step": 2080
},
{
"epoch": 6.430769230769231,
"grad_norm": 1.2170274257659912,
"learning_rate": 4.2218181818181816e-05,
"loss": 1.3696,
"step": 2090
},
{
"epoch": 6.461538461538462,
"grad_norm": 1.613784670829773,
"learning_rate": 4.185454545454546e-05,
"loss": 1.28,
"step": 2100
},
{
"epoch": 6.492307692307692,
"grad_norm": 1.2165530920028687,
"learning_rate": 4.149090909090909e-05,
"loss": 1.2933,
"step": 2110
},
{
"epoch": 6.523076923076923,
"grad_norm": 1.2213079929351807,
"learning_rate": 4.112727272727273e-05,
"loss": 1.2062,
"step": 2120
},
{
"epoch": 6.553846153846154,
"grad_norm": 1.5889174938201904,
"learning_rate": 4.076363636363636e-05,
"loss": 1.2481,
"step": 2130
},
{
"epoch": 6.584615384615384,
"grad_norm": 1.2638423442840576,
"learning_rate": 4.0400000000000006e-05,
"loss": 1.29,
"step": 2140
},
{
"epoch": 6.615384615384615,
"grad_norm": 1.0796576738357544,
"learning_rate": 4.0036363636363636e-05,
"loss": 1.1189,
"step": 2150
},
{
"epoch": 6.6461538461538465,
"grad_norm": 1.5910948514938354,
"learning_rate": 3.967272727272727e-05,
"loss": 1.2767,
"step": 2160
},
{
"epoch": 6.676923076923077,
"grad_norm": 1.3346668481826782,
"learning_rate": 3.930909090909091e-05,
"loss": 1.2212,
"step": 2170
},
{
"epoch": 6.707692307692308,
"grad_norm": 1.1277836561203003,
"learning_rate": 3.8945454545454547e-05,
"loss": 1.2392,
"step": 2180
},
{
"epoch": 6.7384615384615385,
"grad_norm": 1.1212108135223389,
"learning_rate": 3.858181818181818e-05,
"loss": 1.3223,
"step": 2190
},
{
"epoch": 6.769230769230769,
"grad_norm": 1.2175902128219604,
"learning_rate": 3.821818181818182e-05,
"loss": 1.3124,
"step": 2200
},
{
"epoch": 6.769230769230769,
"eval_gen_len": 187.0146,
"eval_loss": 1.6920864582061768,
"eval_rouge1": 0.606,
"eval_rouge2": 0.2554,
"eval_rougeL": 0.4694,
"eval_runtime": 34.3853,
"eval_samples_per_second": 7.969,
"eval_steps_per_second": 2.007,
"step": 2200
},
{
"epoch": 6.8,
"grad_norm": 1.163404107093811,
"learning_rate": 3.785454545454546e-05,
"loss": 1.4557,
"step": 2210
},
{
"epoch": 6.8307692307692305,
"grad_norm": 1.5031542778015137,
"learning_rate": 3.7490909090909094e-05,
"loss": 1.1773,
"step": 2220
},
{
"epoch": 6.861538461538462,
"grad_norm": 1.0126748085021973,
"learning_rate": 3.712727272727273e-05,
"loss": 1.3922,
"step": 2230
},
{
"epoch": 6.892307692307693,
"grad_norm": 1.0135952234268188,
"learning_rate": 3.676363636363637e-05,
"loss": 1.2028,
"step": 2240
},
{
"epoch": 6.923076923076923,
"grad_norm": 1.16098153591156,
"learning_rate": 3.6400000000000004e-05,
"loss": 1.4111,
"step": 2250
},
{
"epoch": 6.953846153846154,
"grad_norm": 1.488234519958496,
"learning_rate": 3.603636363636364e-05,
"loss": 1.3131,
"step": 2260
},
{
"epoch": 6.984615384615385,
"grad_norm": 1.129989743232727,
"learning_rate": 3.567272727272728e-05,
"loss": 1.1701,
"step": 2270
},
{
"epoch": 7.015384615384615,
"grad_norm": 1.218468189239502,
"learning_rate": 3.530909090909091e-05,
"loss": 1.2604,
"step": 2280
},
{
"epoch": 7.046153846153846,
"grad_norm": 1.2339926958084106,
"learning_rate": 3.494545454545455e-05,
"loss": 1.0932,
"step": 2290
},
{
"epoch": 7.076923076923077,
"grad_norm": 1.4972765445709229,
"learning_rate": 3.458181818181818e-05,
"loss": 1.1137,
"step": 2300
},
{
"epoch": 7.107692307692307,
"grad_norm": 1.1884584426879883,
"learning_rate": 3.4218181818181824e-05,
"loss": 1.1522,
"step": 2310
},
{
"epoch": 7.138461538461539,
"grad_norm": 1.4934840202331543,
"learning_rate": 3.3854545454545454e-05,
"loss": 1.3121,
"step": 2320
},
{
"epoch": 7.1692307692307695,
"grad_norm": 1.1432678699493408,
"learning_rate": 3.34909090909091e-05,
"loss": 1.1549,
"step": 2330
},
{
"epoch": 7.2,
"grad_norm": 1.1708807945251465,
"learning_rate": 3.312727272727273e-05,
"loss": 1.1692,
"step": 2340
},
{
"epoch": 7.230769230769231,
"grad_norm": 1.3824517726898193,
"learning_rate": 3.2763636363636365e-05,
"loss": 1.208,
"step": 2350
},
{
"epoch": 7.2615384615384615,
"grad_norm": 1.1225407123565674,
"learning_rate": 3.24e-05,
"loss": 1.1542,
"step": 2360
},
{
"epoch": 7.292307692307692,
"grad_norm": 1.2445507049560547,
"learning_rate": 3.203636363636364e-05,
"loss": 1.2265,
"step": 2370
},
{
"epoch": 7.323076923076923,
"grad_norm": 1.256062626838684,
"learning_rate": 3.1672727272727275e-05,
"loss": 1.1822,
"step": 2380
},
{
"epoch": 7.3538461538461535,
"grad_norm": 1.3986501693725586,
"learning_rate": 3.130909090909091e-05,
"loss": 1.262,
"step": 2390
},
{
"epoch": 7.384615384615385,
"grad_norm": 1.1086236238479614,
"learning_rate": 3.094545454545455e-05,
"loss": 1.2275,
"step": 2400
},
{
"epoch": 7.384615384615385,
"eval_gen_len": 187.0146,
"eval_loss": 1.6998823881149292,
"eval_rouge1": 0.6055,
"eval_rouge2": 0.2541,
"eval_rougeL": 0.4684,
"eval_runtime": 33.5145,
"eval_samples_per_second": 8.176,
"eval_steps_per_second": 2.059,
"step": 2400
},
{
"epoch": 7.415384615384616,
"grad_norm": 1.5682780742645264,
"learning_rate": 3.0581818181818185e-05,
"loss": 1.3442,
"step": 2410
},
{
"epoch": 7.446153846153846,
"grad_norm": 1.034818410873413,
"learning_rate": 3.021818181818182e-05,
"loss": 1.2158,
"step": 2420
},
{
"epoch": 7.476923076923077,
"grad_norm": 1.2816352844238281,
"learning_rate": 2.985454545454546e-05,
"loss": 1.1646,
"step": 2430
},
{
"epoch": 7.507692307692308,
"grad_norm": 1.252765417098999,
"learning_rate": 2.9490909090909092e-05,
"loss": 1.1985,
"step": 2440
},
{
"epoch": 7.538461538461538,
"grad_norm": 1.4074809551239014,
"learning_rate": 2.9127272727272732e-05,
"loss": 1.3245,
"step": 2450
},
{
"epoch": 7.569230769230769,
"grad_norm": 1.3757801055908203,
"learning_rate": 2.8763636363636366e-05,
"loss": 1.2856,
"step": 2460
},
{
"epoch": 7.6,
"grad_norm": 1.355635643005371,
"learning_rate": 2.84e-05,
"loss": 1.229,
"step": 2470
},
{
"epoch": 7.63076923076923,
"grad_norm": 1.185659646987915,
"learning_rate": 2.803636363636364e-05,
"loss": 1.2444,
"step": 2480
},
{
"epoch": 7.661538461538462,
"grad_norm": 1.4726060628890991,
"learning_rate": 2.7672727272727273e-05,
"loss": 1.2877,
"step": 2490
},
{
"epoch": 7.6923076923076925,
"grad_norm": 1.525718092918396,
"learning_rate": 2.7309090909090913e-05,
"loss": 1.2993,
"step": 2500
},
{
"epoch": 7.723076923076923,
"grad_norm": 1.2432451248168945,
"learning_rate": 2.6945454545454546e-05,
"loss": 1.2803,
"step": 2510
},
{
"epoch": 7.753846153846154,
"grad_norm": 1.2237839698791504,
"learning_rate": 2.6581818181818186e-05,
"loss": 1.2785,
"step": 2520
},
{
"epoch": 7.7846153846153845,
"grad_norm": 1.5360924005508423,
"learning_rate": 2.621818181818182e-05,
"loss": 1.3164,
"step": 2530
},
{
"epoch": 7.815384615384615,
"grad_norm": 1.4242442846298218,
"learning_rate": 2.5854545454545453e-05,
"loss": 1.1864,
"step": 2540
},
{
"epoch": 7.846153846153846,
"grad_norm": 1.0664770603179932,
"learning_rate": 2.5490909090909093e-05,
"loss": 1.2769,
"step": 2550
},
{
"epoch": 7.876923076923077,
"grad_norm": 1.3427962064743042,
"learning_rate": 2.5127272727272727e-05,
"loss": 1.183,
"step": 2560
},
{
"epoch": 7.907692307692308,
"grad_norm": 1.2692787647247314,
"learning_rate": 2.4763636363636363e-05,
"loss": 1.3775,
"step": 2570
},
{
"epoch": 7.938461538461539,
"grad_norm": 1.3220490217208862,
"learning_rate": 2.44e-05,
"loss": 1.1557,
"step": 2580
},
{
"epoch": 7.969230769230769,
"grad_norm": 1.31517493724823,
"learning_rate": 2.4036363636363637e-05,
"loss": 1.3144,
"step": 2590
},
{
"epoch": 8.0,
"grad_norm": 1.145848274230957,
"learning_rate": 2.3672727272727274e-05,
"loss": 1.4194,
"step": 2600
},
{
"epoch": 8.0,
"eval_gen_len": 187.0146,
"eval_loss": 1.6980103254318237,
"eval_rouge1": 0.6066,
"eval_rouge2": 0.2565,
"eval_rougeL": 0.4699,
"eval_runtime": 33.5275,
"eval_samples_per_second": 8.172,
"eval_steps_per_second": 2.058,
"step": 2600
},
{
"epoch": 8.03076923076923,
"grad_norm": 1.3528770208358765,
"learning_rate": 2.330909090909091e-05,
"loss": 1.2504,
"step": 2610
},
{
"epoch": 8.061538461538461,
"grad_norm": 1.1651642322540283,
"learning_rate": 2.2945454545454547e-05,
"loss": 1.0993,
"step": 2620
},
{
"epoch": 8.092307692307692,
"grad_norm": 1.1845202445983887,
"learning_rate": 2.258181818181818e-05,
"loss": 1.1356,
"step": 2630
},
{
"epoch": 8.123076923076923,
"grad_norm": 1.2000699043273926,
"learning_rate": 2.2218181818181817e-05,
"loss": 1.206,
"step": 2640
},
{
"epoch": 8.153846153846153,
"grad_norm": 1.449044108390808,
"learning_rate": 2.1854545454545454e-05,
"loss": 1.2059,
"step": 2650
},
{
"epoch": 8.184615384615384,
"grad_norm": 1.2176152467727661,
"learning_rate": 2.149090909090909e-05,
"loss": 1.1849,
"step": 2660
},
{
"epoch": 8.215384615384615,
"grad_norm": 1.4765113592147827,
"learning_rate": 2.1127272727272728e-05,
"loss": 1.3351,
"step": 2670
},
{
"epoch": 8.246153846153845,
"grad_norm": 1.5038341283798218,
"learning_rate": 2.0763636363636364e-05,
"loss": 1.2766,
"step": 2680
},
{
"epoch": 8.276923076923078,
"grad_norm": 1.3483731746673584,
"learning_rate": 2.04e-05,
"loss": 1.1067,
"step": 2690
},
{
"epoch": 8.307692307692308,
"grad_norm": 1.025032639503479,
"learning_rate": 2.0036363636363638e-05,
"loss": 1.2155,
"step": 2700
},
{
"epoch": 8.338461538461539,
"grad_norm": 1.3824971914291382,
"learning_rate": 1.9672727272727275e-05,
"loss": 1.1767,
"step": 2710
},
{
"epoch": 8.36923076923077,
"grad_norm": 1.2280523777008057,
"learning_rate": 1.930909090909091e-05,
"loss": 1.2881,
"step": 2720
},
{
"epoch": 8.4,
"grad_norm": 1.2223644256591797,
"learning_rate": 1.8945454545454548e-05,
"loss": 1.1898,
"step": 2730
},
{
"epoch": 8.430769230769231,
"grad_norm": 1.349334955215454,
"learning_rate": 1.8581818181818185e-05,
"loss": 1.2984,
"step": 2740
},
{
"epoch": 8.461538461538462,
"grad_norm": 1.2894556522369385,
"learning_rate": 1.8218181818181822e-05,
"loss": 1.3458,
"step": 2750
},
{
"epoch": 8.492307692307692,
"grad_norm": 1.6086795330047607,
"learning_rate": 1.7854545454545455e-05,
"loss": 1.2394,
"step": 2760
},
{
"epoch": 8.523076923076923,
"grad_norm": 1.4955778121948242,
"learning_rate": 1.7490909090909092e-05,
"loss": 1.1506,
"step": 2770
},
{
"epoch": 8.553846153846154,
"grad_norm": 1.4156781435012817,
"learning_rate": 1.712727272727273e-05,
"loss": 1.1746,
"step": 2780
},
{
"epoch": 8.584615384615384,
"grad_norm": 1.2073826789855957,
"learning_rate": 1.6763636363636365e-05,
"loss": 1.1301,
"step": 2790
},
{
"epoch": 8.615384615384615,
"grad_norm": 1.574342966079712,
"learning_rate": 1.6400000000000002e-05,
"loss": 1.3976,
"step": 2800
},
{
"epoch": 8.615384615384615,
"eval_gen_len": 187.0146,
"eval_loss": 1.7040081024169922,
"eval_rouge1": 0.6049,
"eval_rouge2": 0.2551,
"eval_rougeL": 0.469,
"eval_runtime": 34.1499,
"eval_samples_per_second": 8.023,
"eval_steps_per_second": 2.021,
"step": 2800
},
{
"epoch": 8.646153846153846,
"grad_norm": 1.207323670387268,
"learning_rate": 1.603636363636364e-05,
"loss": 1.2123,
"step": 2810
},
{
"epoch": 8.676923076923076,
"grad_norm": 1.346170425415039,
"learning_rate": 1.5672727272727272e-05,
"loss": 1.1284,
"step": 2820
},
{
"epoch": 8.707692307692307,
"grad_norm": 1.3920952081680298,
"learning_rate": 1.530909090909091e-05,
"loss": 1.1961,
"step": 2830
},
{
"epoch": 8.73846153846154,
"grad_norm": 1.4912207126617432,
"learning_rate": 1.4945454545454546e-05,
"loss": 1.2558,
"step": 2840
},
{
"epoch": 8.76923076923077,
"grad_norm": 0.9997207522392273,
"learning_rate": 1.4581818181818183e-05,
"loss": 1.1842,
"step": 2850
},
{
"epoch": 8.8,
"grad_norm": 1.207138180732727,
"learning_rate": 1.421818181818182e-05,
"loss": 1.2588,
"step": 2860
},
{
"epoch": 8.830769230769231,
"grad_norm": 1.398917555809021,
"learning_rate": 1.3854545454545456e-05,
"loss": 1.2445,
"step": 2870
},
{
"epoch": 8.861538461538462,
"grad_norm": 1.3793071508407593,
"learning_rate": 1.3490909090909093e-05,
"loss": 1.164,
"step": 2880
},
{
"epoch": 8.892307692307693,
"grad_norm": 1.2650920152664185,
"learning_rate": 1.3127272727272726e-05,
"loss": 1.2235,
"step": 2890
},
{
"epoch": 8.923076923076923,
"grad_norm": 1.3319740295410156,
"learning_rate": 1.2763636363636363e-05,
"loss": 1.1818,
"step": 2900
},
{
"epoch": 8.953846153846154,
"grad_norm": 1.395668387413025,
"learning_rate": 1.24e-05,
"loss": 1.2237,
"step": 2910
},
{
"epoch": 8.984615384615385,
"grad_norm": 1.1730422973632812,
"learning_rate": 1.2036363636363637e-05,
"loss": 1.0141,
"step": 2920
}
],
"logging_steps": 10,
"max_steps": 3250,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7124807319552000.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}