|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 193.21963394342762, |
|
"global_step": 464500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.997920133111481e-05, |
|
"loss": 3.456, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.995840266222962e-05, |
|
"loss": 1.8719, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9937603993344426e-05, |
|
"loss": 1.7095, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.9916805324459236e-05, |
|
"loss": 1.6155, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.8924, |
|
"eval_gen_len": 19.884, |
|
"eval_loss": 1.4984486103057861, |
|
"eval_runtime": 548.3383, |
|
"eval_samples_per_second": 5.847, |
|
"eval_steps_per_second": 1.463, |
|
"step": 2404 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.9896006655574045e-05, |
|
"loss": 1.5555, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.9875207986688854e-05, |
|
"loss": 1.4646, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.9854409317803664e-05, |
|
"loss": 1.4079, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.983361064891847e-05, |
|
"loss": 1.3488, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.981281198003328e-05, |
|
"loss": 1.3039, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.5947, |
|
"eval_gen_len": 15.5827, |
|
"eval_loss": 1.144504427909851, |
|
"eval_runtime": 282.2519, |
|
"eval_samples_per_second": 11.359, |
|
"eval_steps_per_second": 2.841, |
|
"step": 4808 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.979201331114809e-05, |
|
"loss": 1.2269, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.9771214642262898e-05, |
|
"loss": 1.1561, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.9750415973377707e-05, |
|
"loss": 1.1288, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.9729700499168054e-05, |
|
"loss": 1.0835, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.9708901830282864e-05, |
|
"loss": 1.0647, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.4993, |
|
"eval_gen_len": 14.9208, |
|
"eval_loss": 0.9073113799095154, |
|
"eval_runtime": 267.439, |
|
"eval_samples_per_second": 11.988, |
|
"eval_steps_per_second": 2.999, |
|
"step": 7212 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.9688103161397673e-05, |
|
"loss": 0.9848, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.966730449251248e-05, |
|
"loss": 0.9265, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.964650582362729e-05, |
|
"loss": 0.9057, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.9625707154742098e-05, |
|
"loss": 0.8799, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.9604950083194678e-05, |
|
"loss": 0.8491, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.4462, |
|
"eval_gen_len": 13.549, |
|
"eval_loss": 0.7219734191894531, |
|
"eval_runtime": 263.8796, |
|
"eval_samples_per_second": 12.149, |
|
"eval_steps_per_second": 3.039, |
|
"step": 9616 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.9584151414309487e-05, |
|
"loss": 0.7793, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.9563352745424296e-05, |
|
"loss": 0.7483, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.9542554076539106e-05, |
|
"loss": 0.732, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.9521755407653912e-05, |
|
"loss": 0.7186, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.9500956738768718e-05, |
|
"loss": 0.7024, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.4716, |
|
"eval_gen_len": 15.7813, |
|
"eval_loss": 0.6081481575965881, |
|
"eval_runtime": 336.6018, |
|
"eval_samples_per_second": 9.525, |
|
"eval_steps_per_second": 2.383, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 1.9480158069883527e-05, |
|
"loss": 0.6114, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 1.945940099833611e-05, |
|
"loss": 0.6193, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 1.9438602329450916e-05, |
|
"loss": 0.6114, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 1.9417803660565726e-05, |
|
"loss": 0.5797, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.3726, |
|
"eval_gen_len": 13.8163, |
|
"eval_loss": 0.5052656531333923, |
|
"eval_runtime": 259.2943, |
|
"eval_samples_per_second": 12.364, |
|
"eval_steps_per_second": 3.093, |
|
"step": 14424 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 1.9397004991680535e-05, |
|
"loss": 0.5658, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 1.9376206322795344e-05, |
|
"loss": 0.52, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 1.9355407653910154e-05, |
|
"loss": 0.4992, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 1.933460898502496e-05, |
|
"loss": 0.5095, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 1.931381031613977e-05, |
|
"loss": 0.4966, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.3997, |
|
"eval_gen_len": 15.0505, |
|
"eval_loss": 0.4698517620563507, |
|
"eval_runtime": 299.617, |
|
"eval_samples_per_second": 10.7, |
|
"eval_steps_per_second": 2.677, |
|
"step": 16828 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.929305324459235e-05, |
|
"loss": 0.4493, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.9272254575707155e-05, |
|
"loss": 0.4288, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 1.9251455906821964e-05, |
|
"loss": 0.4397, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 1.9230657237936774e-05, |
|
"loss": 0.423, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 1.9209900166389354e-05, |
|
"loss": 0.4234, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.3414, |
|
"eval_gen_len": 14.2976, |
|
"eval_loss": 0.41973158717155457, |
|
"eval_runtime": 277.9117, |
|
"eval_samples_per_second": 11.536, |
|
"eval_steps_per_second": 2.886, |
|
"step": 19232 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 1.918910149750416e-05, |
|
"loss": 0.377, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 1.916834442595674e-05, |
|
"loss": 0.3612, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 1.914754575707155e-05, |
|
"loss": 0.3725, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 1.9126747088186358e-05, |
|
"loss": 0.3607, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 1.9105948419301164e-05, |
|
"loss": 0.3661, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.3568, |
|
"eval_gen_len": 14.5349, |
|
"eval_loss": 0.36951112747192383, |
|
"eval_runtime": 287.7236, |
|
"eval_samples_per_second": 11.143, |
|
"eval_steps_per_second": 2.787, |
|
"step": 21636 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 1.9085149750415974e-05, |
|
"loss": 0.3287, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 1.9064351081530783e-05, |
|
"loss": 0.3185, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1.9043552412645592e-05, |
|
"loss": 0.3082, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 1.90227537437604e-05, |
|
"loss": 0.3177, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 1.9001996672212978e-05, |
|
"loss": 0.3094, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.3123, |
|
"eval_gen_len": 13.6931, |
|
"eval_loss": 0.35325565934181213, |
|
"eval_runtime": 287.7264, |
|
"eval_samples_per_second": 11.143, |
|
"eval_steps_per_second": 2.787, |
|
"step": 24040 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 1.8981198003327787e-05, |
|
"loss": 0.2623, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 1.8960399334442597e-05, |
|
"loss": 0.2666, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 1.8939600665557406e-05, |
|
"loss": 0.2667, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 1.8918843594009986e-05, |
|
"loss": 0.2695, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.3036, |
|
"eval_gen_len": 13.8085, |
|
"eval_loss": 0.34151870012283325, |
|
"eval_runtime": 267.5842, |
|
"eval_samples_per_second": 11.981, |
|
"eval_steps_per_second": 2.997, |
|
"step": 26444 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 1.8898086522462566e-05, |
|
"loss": 0.2818, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 1.8877287853577372e-05, |
|
"loss": 0.24, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 1.885648918469218e-05, |
|
"loss": 0.2347, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 1.883569051580699e-05, |
|
"loss": 0.2372, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 1.88148918469218e-05, |
|
"loss": 0.242, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.2954, |
|
"eval_gen_len": 13.5817, |
|
"eval_loss": 0.33137527108192444, |
|
"eval_runtime": 249.8381, |
|
"eval_samples_per_second": 12.832, |
|
"eval_steps_per_second": 3.21, |
|
"step": 28848 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 1.8794093178036606e-05, |
|
"loss": 0.2294, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 1.8773336106489186e-05, |
|
"loss": 0.2139, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 1.8752537437603995e-05, |
|
"loss": 0.2173, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 1.8731738768718805e-05, |
|
"loss": 0.214, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 1.871094009983361e-05, |
|
"loss": 0.2123, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.2927, |
|
"eval_gen_len": 13.8079, |
|
"eval_loss": 0.3195803463459015, |
|
"eval_runtime": 264.0497, |
|
"eval_samples_per_second": 12.142, |
|
"eval_steps_per_second": 3.037, |
|
"step": 31252 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 1.869014143094842e-05, |
|
"loss": 0.191, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 1.866934276206323e-05, |
|
"loss": 0.187, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 1.864854409317804e-05, |
|
"loss": 0.1903, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 1.8627745424292848e-05, |
|
"loss": 0.1978, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 1.8606946755407654e-05, |
|
"loss": 0.1954, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.2802, |
|
"eval_gen_len": 13.5215, |
|
"eval_loss": 0.3065420985221863, |
|
"eval_runtime": 255.1945, |
|
"eval_samples_per_second": 12.563, |
|
"eval_steps_per_second": 3.143, |
|
"step": 33656 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 1.8586231281198004e-05, |
|
"loss": 0.1658, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 1.8565432612312814e-05, |
|
"loss": 0.165, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 1.854463394342762e-05, |
|
"loss": 0.1673, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 1.852383527454243e-05, |
|
"loss": 0.174, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 1.850303660565724e-05, |
|
"loss": 0.1734, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.301, |
|
"eval_gen_len": 14.1818, |
|
"eval_loss": 0.3554905652999878, |
|
"eval_runtime": 265.2219, |
|
"eval_samples_per_second": 12.088, |
|
"eval_steps_per_second": 3.024, |
|
"step": 36060 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"learning_rate": 1.8482237936772048e-05, |
|
"loss": 0.1436, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 1.8461439267886857e-05, |
|
"loss": 0.1566, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 1.8440640599001663e-05, |
|
"loss": 0.1548, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 1.8419883527454246e-05, |
|
"loss": 0.1598, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.2854, |
|
"eval_gen_len": 13.975, |
|
"eval_loss": 0.3254939913749695, |
|
"eval_runtime": 272.7931, |
|
"eval_samples_per_second": 11.752, |
|
"eval_steps_per_second": 2.94, |
|
"step": 38464 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 1.8399084858569052e-05, |
|
"loss": 0.1543, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 1.8378286189683862e-05, |
|
"loss": 0.136, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 1.835748752079867e-05, |
|
"loss": 0.1356, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 1.833673044925125e-05, |
|
"loss": 0.14, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 1.8315931780366057e-05, |
|
"loss": 0.1442, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.2748, |
|
"eval_gen_len": 13.4192, |
|
"eval_loss": 0.3075180649757385, |
|
"eval_runtime": 259.5551, |
|
"eval_samples_per_second": 12.352, |
|
"eval_steps_per_second": 3.09, |
|
"step": 40868 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 1.8295133111480866e-05, |
|
"loss": 0.1417, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 1.8274334442595676e-05, |
|
"loss": 0.1241, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 17.47, |
|
"learning_rate": 1.8253577371048256e-05, |
|
"loss": 0.1254, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 1.823277870216306e-05, |
|
"loss": 0.1315, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"learning_rate": 1.821198003327787e-05, |
|
"loss": 0.1288, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.2763, |
|
"eval_gen_len": 13.5976, |
|
"eval_loss": 0.30095288157463074, |
|
"eval_runtime": 261.6386, |
|
"eval_samples_per_second": 12.254, |
|
"eval_steps_per_second": 3.065, |
|
"step": 43272 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 1.819118136439268e-05, |
|
"loss": 0.123, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 1.817042429284526e-05, |
|
"loss": 0.1178, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 1.8149625623960066e-05, |
|
"loss": 0.1203, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"learning_rate": 1.8128826955074876e-05, |
|
"loss": 0.1206, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 1.8108028286189685e-05, |
|
"loss": 0.1249, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.2781, |
|
"eval_gen_len": 13.9969, |
|
"eval_loss": 0.3196047246456146, |
|
"eval_runtime": 267.3702, |
|
"eval_samples_per_second": 11.991, |
|
"eval_steps_per_second": 3.0, |
|
"step": 45676 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 1.8087271214642265e-05, |
|
"loss": 0.1067, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 1.8066514143094845e-05, |
|
"loss": 0.106, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 19.55, |
|
"learning_rate": 1.8045715474209654e-05, |
|
"loss": 0.1083, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 1.802491680532446e-05, |
|
"loss": 0.1098, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 19.97, |
|
"learning_rate": 1.800411813643927e-05, |
|
"loss": 0.1182, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.2783, |
|
"eval_gen_len": 13.6728, |
|
"eval_loss": 0.3209761679172516, |
|
"eval_runtime": 262.3576, |
|
"eval_samples_per_second": 12.22, |
|
"eval_steps_per_second": 3.057, |
|
"step": 48080 |
|
}, |
|
{ |
|
"epoch": 20.17, |
|
"learning_rate": 1.798331946755408e-05, |
|
"loss": 0.0975, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 20.38, |
|
"learning_rate": 1.7962520798668888e-05, |
|
"loss": 0.0975, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 20.59, |
|
"learning_rate": 1.7941722129783698e-05, |
|
"loss": 0.1024, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 1.7920923460898504e-05, |
|
"loss": 0.1056, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 0.2707, |
|
"eval_gen_len": 13.5714, |
|
"eval_loss": 0.3157811760902405, |
|
"eval_runtime": 260.7596, |
|
"eval_samples_per_second": 12.295, |
|
"eval_steps_per_second": 3.076, |
|
"step": 50484 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 1.7900166389351083e-05, |
|
"loss": 0.1029, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"learning_rate": 1.7879367720465893e-05, |
|
"loss": 0.0943, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 21.42, |
|
"learning_rate": 1.7858569051580702e-05, |
|
"loss": 0.0929, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"learning_rate": 1.7837770382695508e-05, |
|
"loss": 0.0965, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 21.84, |
|
"learning_rate": 1.7816971713810317e-05, |
|
"loss": 0.0916, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 0.2685, |
|
"eval_gen_len": 13.6404, |
|
"eval_loss": 0.315157949924469, |
|
"eval_runtime": 259.3506, |
|
"eval_samples_per_second": 12.362, |
|
"eval_steps_per_second": 3.092, |
|
"step": 52888 |
|
}, |
|
{ |
|
"epoch": 22.05, |
|
"learning_rate": 1.7796173044925127e-05, |
|
"loss": 0.0992, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"learning_rate": 1.7775415973377707e-05, |
|
"loss": 0.0884, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 22.46, |
|
"learning_rate": 1.7754617304492513e-05, |
|
"loss": 0.0922, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"learning_rate": 1.7733818635607322e-05, |
|
"loss": 0.0826, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 22.88, |
|
"learning_rate": 1.771301996672213e-05, |
|
"loss": 0.0934, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.2711, |
|
"eval_gen_len": 13.8575, |
|
"eval_loss": 0.3333517909049988, |
|
"eval_runtime": 261.6177, |
|
"eval_samples_per_second": 12.255, |
|
"eval_steps_per_second": 3.066, |
|
"step": 55292 |
|
}, |
|
{ |
|
"epoch": 23.09, |
|
"learning_rate": 1.769222129783694e-05, |
|
"loss": 0.0861, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 23.29, |
|
"learning_rate": 1.7671464226289517e-05, |
|
"loss": 0.081, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"learning_rate": 1.7650665557404327e-05, |
|
"loss": 0.0798, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 23.71, |
|
"learning_rate": 1.7629866888519136e-05, |
|
"loss": 0.0843, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"learning_rate": 1.7609068219633945e-05, |
|
"loss": 0.0941, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.2656, |
|
"eval_gen_len": 13.5583, |
|
"eval_loss": 0.3143361210823059, |
|
"eval_runtime": 261.1394, |
|
"eval_samples_per_second": 12.277, |
|
"eval_steps_per_second": 3.071, |
|
"step": 57696 |
|
}, |
|
{ |
|
"epoch": 24.13, |
|
"learning_rate": 1.7588269550748755e-05, |
|
"loss": 0.0809, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 24.33, |
|
"learning_rate": 1.756747088186356e-05, |
|
"loss": 0.0784, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 24.54, |
|
"learning_rate": 1.754671381031614e-05, |
|
"loss": 0.0785, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 24.75, |
|
"learning_rate": 1.752591514143095e-05, |
|
"loss": 0.0782, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 24.96, |
|
"learning_rate": 1.750511647254576e-05, |
|
"loss": 0.0773, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.2641, |
|
"eval_gen_len": 13.5561, |
|
"eval_loss": 0.3231368660926819, |
|
"eval_runtime": 255.8084, |
|
"eval_samples_per_second": 12.533, |
|
"eval_steps_per_second": 3.135, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 25.17, |
|
"learning_rate": 1.7484317803660565e-05, |
|
"loss": 0.0744, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 25.37, |
|
"learning_rate": 1.7463519134775375e-05, |
|
"loss": 0.0719, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 25.58, |
|
"learning_rate": 1.7442762063227955e-05, |
|
"loss": 0.0821, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 25.79, |
|
"learning_rate": 1.7421963394342764e-05, |
|
"loss": 0.0781, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 1.740116472545757e-05, |
|
"loss": 0.0759, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 0.2668, |
|
"eval_gen_len": 13.7564, |
|
"eval_loss": 0.3243275582790375, |
|
"eval_runtime": 262.8593, |
|
"eval_samples_per_second": 12.197, |
|
"eval_steps_per_second": 3.051, |
|
"step": 62504 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 1.738036605657238e-05, |
|
"loss": 0.0664, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"learning_rate": 1.735956738768719e-05, |
|
"loss": 0.0683, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 26.62, |
|
"learning_rate": 1.733881031613977e-05, |
|
"loss": 0.0712, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 26.83, |
|
"learning_rate": 1.7318011647254578e-05, |
|
"loss": 0.077, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 0.267, |
|
"eval_gen_len": 13.7558, |
|
"eval_loss": 0.33405444025993347, |
|
"eval_runtime": 259.6084, |
|
"eval_samples_per_second": 12.349, |
|
"eval_steps_per_second": 3.089, |
|
"step": 64908 |
|
}, |
|
{ |
|
"epoch": 27.04, |
|
"learning_rate": 1.7297212978369387e-05, |
|
"loss": 0.076, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 27.25, |
|
"learning_rate": 1.7276414309484197e-05, |
|
"loss": 0.0595, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 1.7255615640599003e-05, |
|
"loss": 0.0608, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 27.66, |
|
"learning_rate": 1.7234858569051583e-05, |
|
"loss": 0.0705, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 27.87, |
|
"learning_rate": 1.7214059900166392e-05, |
|
"loss": 0.0743, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 0.2796, |
|
"eval_gen_len": 14.1085, |
|
"eval_loss": 0.3349040448665619, |
|
"eval_runtime": 282.8059, |
|
"eval_samples_per_second": 11.336, |
|
"eval_steps_per_second": 2.836, |
|
"step": 67312 |
|
}, |
|
{ |
|
"epoch": 28.08, |
|
"learning_rate": 1.71932612312812e-05, |
|
"loss": 0.0674, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 28.29, |
|
"learning_rate": 1.7172462562396007e-05, |
|
"loss": 0.0621, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 28.49, |
|
"learning_rate": 1.7151663893510817e-05, |
|
"loss": 0.0661, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"learning_rate": 1.7130865224625626e-05, |
|
"loss": 0.0611, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 28.91, |
|
"learning_rate": 1.7110108153078206e-05, |
|
"loss": 0.0662, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 0.2623, |
|
"eval_gen_len": 13.5396, |
|
"eval_loss": 0.3251936137676239, |
|
"eval_runtime": 258.4167, |
|
"eval_samples_per_second": 12.406, |
|
"eval_steps_per_second": 3.104, |
|
"step": 69716 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 1.7089309484193012e-05, |
|
"loss": 0.0652, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 1.706851081530782e-05, |
|
"loss": 0.0557, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 29.53, |
|
"learning_rate": 1.704771214642263e-05, |
|
"loss": 0.0584, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 29.74, |
|
"learning_rate": 1.702691347753744e-05, |
|
"loss": 0.0627, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 29.95, |
|
"learning_rate": 1.700611480865225e-05, |
|
"loss": 0.0685, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 0.2643, |
|
"eval_gen_len": 13.6528, |
|
"eval_loss": 0.33093786239624023, |
|
"eval_runtime": 269.9487, |
|
"eval_samples_per_second": 11.876, |
|
"eval_steps_per_second": 2.971, |
|
"step": 72120 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"learning_rate": 1.6985357737104826e-05, |
|
"loss": 0.0592, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 30.37, |
|
"learning_rate": 1.6964559068219635e-05, |
|
"loss": 0.0564, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 30.57, |
|
"learning_rate": 1.6943760399334445e-05, |
|
"loss": 0.0623, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 30.78, |
|
"learning_rate": 1.692296173044925e-05, |
|
"loss": 0.0599, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"learning_rate": 1.690220465890183e-05, |
|
"loss": 0.0619, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 0.266, |
|
"eval_gen_len": 13.7171, |
|
"eval_loss": 0.3532868027687073, |
|
"eval_runtime": 257.8089, |
|
"eval_samples_per_second": 12.436, |
|
"eval_steps_per_second": 3.111, |
|
"step": 74524 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"learning_rate": 1.688140599001664e-05, |
|
"loss": 0.0499, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 31.41, |
|
"learning_rate": 1.686060732113145e-05, |
|
"loss": 0.0592, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 31.61, |
|
"learning_rate": 1.6839808652246255e-05, |
|
"loss": 0.0555, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 1.6819009983361064e-05, |
|
"loss": 0.0602, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 0.2661, |
|
"eval_gen_len": 13.8924, |
|
"eval_loss": 0.34633541107177734, |
|
"eval_runtime": 271.365, |
|
"eval_samples_per_second": 11.814, |
|
"eval_steps_per_second": 2.955, |
|
"step": 76928 |
|
}, |
|
{ |
|
"epoch": 32.03, |
|
"learning_rate": 1.6798211314475874e-05, |
|
"loss": 0.0575, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 32.24, |
|
"learning_rate": 1.6777412645590683e-05, |
|
"loss": 0.0506, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 32.45, |
|
"learning_rate": 1.6756613976705493e-05, |
|
"loss": 0.0566, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 32.65, |
|
"learning_rate": 1.6735856905158072e-05, |
|
"loss": 0.0532, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 32.86, |
|
"learning_rate": 1.6715058236272882e-05, |
|
"loss": 0.0568, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 0.2645, |
|
"eval_gen_len": 13.5443, |
|
"eval_loss": 0.3539523482322693, |
|
"eval_runtime": 256.805, |
|
"eval_samples_per_second": 12.484, |
|
"eval_steps_per_second": 3.123, |
|
"step": 79332 |
|
}, |
|
{ |
|
"epoch": 33.07, |
|
"learning_rate": 1.669425956738769e-05, |
|
"loss": 0.057, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 33.28, |
|
"learning_rate": 1.6673460898502497e-05, |
|
"loss": 0.0467, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 33.49, |
|
"learning_rate": 1.6652703826955077e-05, |
|
"loss": 0.0514, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 33.69, |
|
"learning_rate": 1.6631905158069886e-05, |
|
"loss": 0.0522, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 33.9, |
|
"learning_rate": 1.6611106489184692e-05, |
|
"loss": 0.0528, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 0.2621, |
|
"eval_gen_len": 13.4984, |
|
"eval_loss": 0.3489411473274231, |
|
"eval_runtime": 254.1092, |
|
"eval_samples_per_second": 12.617, |
|
"eval_steps_per_second": 3.156, |
|
"step": 81736 |
|
}, |
|
{ |
|
"epoch": 34.11, |
|
"learning_rate": 1.6590307820299502e-05, |
|
"loss": 0.05, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 34.32, |
|
"learning_rate": 1.656955074875208e-05, |
|
"loss": 0.0477, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 34.53, |
|
"learning_rate": 1.654875207986689e-05, |
|
"loss": 0.0567, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 34.73, |
|
"learning_rate": 1.6527953410981697e-05, |
|
"loss": 0.0477, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 34.94, |
|
"learning_rate": 1.6507154742096506e-05, |
|
"loss": 0.0544, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 0.2662, |
|
"eval_gen_len": 13.7679, |
|
"eval_loss": 0.3497730493545532, |
|
"eval_runtime": 268.9078, |
|
"eval_samples_per_second": 11.922, |
|
"eval_steps_per_second": 2.982, |
|
"step": 84140 |
|
}, |
|
{ |
|
"epoch": 35.15, |
|
"learning_rate": 1.6486356073211316e-05, |
|
"loss": 0.0433, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 35.36, |
|
"learning_rate": 1.6465599001663896e-05, |
|
"loss": 0.0426, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 35.57, |
|
"learning_rate": 1.64448003327787e-05, |
|
"loss": 0.0477, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 35.77, |
|
"learning_rate": 1.642400166389351e-05, |
|
"loss": 0.0497, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 35.98, |
|
"learning_rate": 1.640320299500832e-05, |
|
"loss": 0.0571, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_cer": 0.2661, |
|
"eval_gen_len": 13.8001, |
|
"eval_loss": 0.35858893394470215, |
|
"eval_runtime": 266.9991, |
|
"eval_samples_per_second": 12.008, |
|
"eval_steps_per_second": 3.004, |
|
"step": 86544 |
|
}, |
|
{ |
|
"epoch": 36.19, |
|
"learning_rate": 1.63824459234609e-05, |
|
"loss": 0.0406, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 36.4, |
|
"learning_rate": 1.6361647254575706e-05, |
|
"loss": 0.0431, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 36.61, |
|
"learning_rate": 1.6340848585690516e-05, |
|
"loss": 0.0507, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 36.81, |
|
"learning_rate": 1.6320049916805325e-05, |
|
"loss": 0.0492, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_cer": 0.2564, |
|
"eval_gen_len": 13.5415, |
|
"eval_loss": 0.35140517354011536, |
|
"eval_runtime": 259.4147, |
|
"eval_samples_per_second": 12.359, |
|
"eval_steps_per_second": 3.092, |
|
"step": 88948 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"learning_rate": 1.6299251247920134e-05, |
|
"loss": 0.0515, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 37.23, |
|
"learning_rate": 1.6278452579034944e-05, |
|
"loss": 0.0421, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 37.44, |
|
"learning_rate": 1.6257695507487523e-05, |
|
"loss": 0.0432, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 37.65, |
|
"learning_rate": 1.6236896838602333e-05, |
|
"loss": 0.0449, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 37.85, |
|
"learning_rate": 1.621609816971714e-05, |
|
"loss": 0.0484, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_cer": 0.2639, |
|
"eval_gen_len": 13.8534, |
|
"eval_loss": 0.3746128976345062, |
|
"eval_runtime": 254.0333, |
|
"eval_samples_per_second": 12.62, |
|
"eval_steps_per_second": 3.157, |
|
"step": 91352 |
|
}, |
|
{ |
|
"epoch": 38.06, |
|
"learning_rate": 1.6195299500831948e-05, |
|
"loss": 0.0481, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 38.27, |
|
"learning_rate": 1.6174500831946758e-05, |
|
"loss": 0.0402, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 38.48, |
|
"learning_rate": 1.6153743760399337e-05, |
|
"loss": 0.0395, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 38.69, |
|
"learning_rate": 1.6132945091514143e-05, |
|
"loss": 0.0456, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"learning_rate": 1.6112146422628953e-05, |
|
"loss": 0.0451, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_cer": 0.2627, |
|
"eval_gen_len": 13.7527, |
|
"eval_loss": 0.35255342721939087, |
|
"eval_runtime": 264.6452, |
|
"eval_samples_per_second": 12.114, |
|
"eval_steps_per_second": 3.03, |
|
"step": 93756 |
|
}, |
|
{ |
|
"epoch": 39.1, |
|
"learning_rate": 1.6091347753743762e-05, |
|
"loss": 0.0363, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 39.31, |
|
"learning_rate": 1.6070590682196342e-05, |
|
"loss": 0.0382, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 39.52, |
|
"learning_rate": 1.6049792013311148e-05, |
|
"loss": 0.0448, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 39.73, |
|
"learning_rate": 1.6028993344425957e-05, |
|
"loss": 0.0399, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 39.93, |
|
"learning_rate": 1.6008194675540767e-05, |
|
"loss": 0.045, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_cer": 0.2583, |
|
"eval_gen_len": 13.6694, |
|
"eval_loss": 0.3510436713695526, |
|
"eval_runtime": 264.9384, |
|
"eval_samples_per_second": 12.101, |
|
"eval_steps_per_second": 3.027, |
|
"step": 96160 |
|
}, |
|
{ |
|
"epoch": 40.14, |
|
"learning_rate": 1.5987437603993347e-05, |
|
"loss": 0.0393, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 40.35, |
|
"learning_rate": 1.5966638935108153e-05, |
|
"loss": 0.0338, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 40.56, |
|
"learning_rate": 1.5945840266222962e-05, |
|
"loss": 0.0398, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 40.77, |
|
"learning_rate": 1.5925083194675542e-05, |
|
"loss": 0.0425, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 40.97, |
|
"learning_rate": 1.590428452579035e-05, |
|
"loss": 0.0455, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_cer": 0.2619, |
|
"eval_gen_len": 13.7012, |
|
"eval_loss": 0.3675419092178345, |
|
"eval_runtime": 255.341, |
|
"eval_samples_per_second": 12.556, |
|
"eval_steps_per_second": 3.141, |
|
"step": 98564 |
|
}, |
|
{ |
|
"epoch": 41.18, |
|
"learning_rate": 1.5883485856905157e-05, |
|
"loss": 0.0372, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 41.39, |
|
"learning_rate": 1.5862687188019967e-05, |
|
"loss": 0.0378, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"learning_rate": 1.5841888519134776e-05, |
|
"loss": 0.0369, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 41.81, |
|
"learning_rate": 1.5821089850249585e-05, |
|
"loss": 0.0452, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_cer": 0.2672, |
|
"eval_gen_len": 13.8653, |
|
"eval_loss": 0.3757382035255432, |
|
"eval_runtime": 268.6077, |
|
"eval_samples_per_second": 11.936, |
|
"eval_steps_per_second": 2.986, |
|
"step": 100968 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 1.5800291181364395e-05, |
|
"loss": 0.046, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 42.22, |
|
"learning_rate": 1.57794925124792e-05, |
|
"loss": 0.0319, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 42.43, |
|
"learning_rate": 1.5758735440931784e-05, |
|
"loss": 0.0383, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 42.64, |
|
"learning_rate": 1.573793677204659e-05, |
|
"loss": 0.0376, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 42.85, |
|
"learning_rate": 1.57171381031614e-05, |
|
"loss": 0.0403, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_cer": 0.2583, |
|
"eval_gen_len": 13.6532, |
|
"eval_loss": 0.3421362340450287, |
|
"eval_runtime": 257.7664, |
|
"eval_samples_per_second": 12.438, |
|
"eval_steps_per_second": 3.111, |
|
"step": 103372 |
|
}, |
|
{ |
|
"epoch": 43.05, |
|
"learning_rate": 1.5696339434276205e-05, |
|
"loss": 0.0419, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 43.26, |
|
"learning_rate": 1.5675540765391015e-05, |
|
"loss": 0.0322, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 43.47, |
|
"learning_rate": 1.5654783693843594e-05, |
|
"loss": 0.0396, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 43.68, |
|
"learning_rate": 1.5633985024958404e-05, |
|
"loss": 0.0386, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 43.89, |
|
"learning_rate": 1.5613186356073213e-05, |
|
"loss": 0.0372, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_cer": 0.2553, |
|
"eval_gen_len": 13.7679, |
|
"eval_loss": 0.37172210216522217, |
|
"eval_runtime": 266.5505, |
|
"eval_samples_per_second": 12.028, |
|
"eval_steps_per_second": 3.009, |
|
"step": 105776 |
|
}, |
|
{ |
|
"epoch": 44.09, |
|
"learning_rate": 1.5592387687188023e-05, |
|
"loss": 0.0381, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 44.3, |
|
"learning_rate": 1.55716306156406e-05, |
|
"loss": 0.0343, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 44.51, |
|
"learning_rate": 1.555083194675541e-05, |
|
"loss": 0.0348, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 44.72, |
|
"learning_rate": 1.5530033277870218e-05, |
|
"loss": 0.0412, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 44.93, |
|
"learning_rate": 1.5509234608985027e-05, |
|
"loss": 0.041, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_cer": 0.258, |
|
"eval_gen_len": 13.7246, |
|
"eval_loss": 0.367112934589386, |
|
"eval_runtime": 255.2355, |
|
"eval_samples_per_second": 12.561, |
|
"eval_steps_per_second": 3.142, |
|
"step": 108180 |
|
}, |
|
{ |
|
"epoch": 45.13, |
|
"learning_rate": 1.5488477537437604e-05, |
|
"loss": 0.0358, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 45.34, |
|
"learning_rate": 1.5467678868552413e-05, |
|
"loss": 0.0347, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 45.55, |
|
"learning_rate": 1.5446880199667222e-05, |
|
"loss": 0.0372, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 45.76, |
|
"learning_rate": 1.5426081530782032e-05, |
|
"loss": 0.0369, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"learning_rate": 1.540528286189684e-05, |
|
"loss": 0.0372, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_cer": 0.2596, |
|
"eval_gen_len": 13.8244, |
|
"eval_loss": 0.36525318026542664, |
|
"eval_runtime": 258.7606, |
|
"eval_samples_per_second": 12.39, |
|
"eval_steps_per_second": 3.099, |
|
"step": 110584 |
|
}, |
|
{ |
|
"epoch": 46.17, |
|
"learning_rate": 1.5384525790349418e-05, |
|
"loss": 0.0276, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"learning_rate": 1.5363727121464227e-05, |
|
"loss": 0.0311, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 46.59, |
|
"learning_rate": 1.5342928452579036e-05, |
|
"loss": 0.0343, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"learning_rate": 1.5322129783693846e-05, |
|
"loss": 0.0418, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_cer": 0.2573, |
|
"eval_gen_len": 13.7427, |
|
"eval_loss": 0.3767205476760864, |
|
"eval_runtime": 259.1923, |
|
"eval_samples_per_second": 12.369, |
|
"eval_steps_per_second": 3.094, |
|
"step": 112988 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 1.5301372712146422e-05, |
|
"loss": 0.0387, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 47.21, |
|
"learning_rate": 1.528057404326123e-05, |
|
"loss": 0.0288, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 47.42, |
|
"learning_rate": 1.525977537437604e-05, |
|
"loss": 0.0316, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 47.63, |
|
"learning_rate": 1.5238976705490849e-05, |
|
"loss": 0.0302, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 47.84, |
|
"learning_rate": 1.521821963394343e-05, |
|
"loss": 0.036, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_cer": 0.2645, |
|
"eval_gen_len": 13.9616, |
|
"eval_loss": 0.3853040933609009, |
|
"eval_runtime": 258.5981, |
|
"eval_samples_per_second": 12.398, |
|
"eval_steps_per_second": 3.101, |
|
"step": 115392 |
|
}, |
|
{ |
|
"epoch": 48.04, |
|
"learning_rate": 1.5197420965058238e-05, |
|
"loss": 0.0414, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 48.25, |
|
"learning_rate": 1.5176622296173047e-05, |
|
"loss": 0.0302, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 48.46, |
|
"learning_rate": 1.5155823627287855e-05, |
|
"loss": 0.0336, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 48.67, |
|
"learning_rate": 1.5135066555740435e-05, |
|
"loss": 0.0346, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 48.88, |
|
"learning_rate": 1.5114267886855242e-05, |
|
"loss": 0.0354, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_cer": 0.2571, |
|
"eval_gen_len": 13.3799, |
|
"eval_loss": 0.3713897466659546, |
|
"eval_runtime": 256.4932, |
|
"eval_samples_per_second": 12.499, |
|
"eval_steps_per_second": 3.127, |
|
"step": 117796 |
|
}, |
|
{ |
|
"epoch": 49.08, |
|
"learning_rate": 1.5093469217970052e-05, |
|
"loss": 0.0285, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 49.29, |
|
"learning_rate": 1.507267054908486e-05, |
|
"loss": 0.0311, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 49.5, |
|
"learning_rate": 1.5051871880199669e-05, |
|
"loss": 0.0297, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 49.71, |
|
"learning_rate": 1.5031114808652247e-05, |
|
"loss": 0.0327, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 49.92, |
|
"learning_rate": 1.5010316139767056e-05, |
|
"loss": 0.0336, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_cer": 0.2592, |
|
"eval_gen_len": 13.7667, |
|
"eval_loss": 0.38062140345573425, |
|
"eval_runtime": 257.9154, |
|
"eval_samples_per_second": 12.43, |
|
"eval_steps_per_second": 3.11, |
|
"step": 120200 |
|
}, |
|
{ |
|
"epoch": 50.12, |
|
"learning_rate": 1.4989517470881864e-05, |
|
"loss": 0.0291, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 50.33, |
|
"learning_rate": 1.4968718801996673e-05, |
|
"loss": 0.0286, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 50.54, |
|
"learning_rate": 1.4947920133111483e-05, |
|
"loss": 0.0282, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 50.75, |
|
"learning_rate": 1.4927163061564061e-05, |
|
"loss": 0.0299, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 50.96, |
|
"learning_rate": 1.4906364392678869e-05, |
|
"loss": 0.0367, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_cer": 0.2567, |
|
"eval_gen_len": 13.7402, |
|
"eval_loss": 0.36996766924858093, |
|
"eval_runtime": 265.273, |
|
"eval_samples_per_second": 12.086, |
|
"eval_steps_per_second": 3.023, |
|
"step": 122604 |
|
}, |
|
{ |
|
"epoch": 51.16, |
|
"learning_rate": 1.4885607321131449e-05, |
|
"loss": 0.0269, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 51.37, |
|
"learning_rate": 1.4864808652246256e-05, |
|
"loss": 0.0288, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 51.58, |
|
"learning_rate": 1.4844009983361066e-05, |
|
"loss": 0.0306, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 51.79, |
|
"learning_rate": 1.4823211314475873e-05, |
|
"loss": 0.0352, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 1.4802412645590683e-05, |
|
"loss": 0.0278, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_cer": 0.2552, |
|
"eval_gen_len": 13.6507, |
|
"eval_loss": 0.37308937311172485, |
|
"eval_runtime": 264.0574, |
|
"eval_samples_per_second": 12.141, |
|
"eval_steps_per_second": 3.037, |
|
"step": 125008 |
|
}, |
|
{ |
|
"epoch": 52.2, |
|
"learning_rate": 1.478161397670549e-05, |
|
"loss": 0.0245, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 52.41, |
|
"learning_rate": 1.47608153078203e-05, |
|
"loss": 0.0256, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 52.62, |
|
"learning_rate": 1.4740016638935109e-05, |
|
"loss": 0.0303, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"learning_rate": 1.4719259567387689e-05, |
|
"loss": 0.0318, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_cer": 0.2558, |
|
"eval_gen_len": 13.4878, |
|
"eval_loss": 0.3711611330509186, |
|
"eval_runtime": 256.1423, |
|
"eval_samples_per_second": 12.516, |
|
"eval_steps_per_second": 3.131, |
|
"step": 127412 |
|
}, |
|
{ |
|
"epoch": 53.04, |
|
"learning_rate": 1.4698460898502498e-05, |
|
"loss": 0.0308, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 53.24, |
|
"learning_rate": 1.4677662229617306e-05, |
|
"loss": 0.0258, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 53.45, |
|
"learning_rate": 1.4656863560732115e-05, |
|
"loss": 0.0268, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 53.66, |
|
"learning_rate": 1.4636064891846925e-05, |
|
"loss": 0.0306, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 53.87, |
|
"learning_rate": 1.4615307820299503e-05, |
|
"loss": 0.0307, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_cer": 0.2545, |
|
"eval_gen_len": 13.6042, |
|
"eval_loss": 0.3638122081756592, |
|
"eval_runtime": 259.1147, |
|
"eval_samples_per_second": 12.373, |
|
"eval_steps_per_second": 3.095, |
|
"step": 129816 |
|
}, |
|
{ |
|
"epoch": 54.08, |
|
"learning_rate": 1.459450915141431e-05, |
|
"loss": 0.0246, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 54.28, |
|
"learning_rate": 1.457371048252912e-05, |
|
"loss": 0.0206, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 54.49, |
|
"learning_rate": 1.455291181364393e-05, |
|
"loss": 0.0298, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 54.7, |
|
"learning_rate": 1.4532196339434276e-05, |
|
"loss": 0.0284, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 54.91, |
|
"learning_rate": 1.4511397670549086e-05, |
|
"loss": 0.0277, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_cer": 0.2574, |
|
"eval_gen_len": 13.8247, |
|
"eval_loss": 0.3866593539714813, |
|
"eval_runtime": 266.2003, |
|
"eval_samples_per_second": 12.044, |
|
"eval_steps_per_second": 3.013, |
|
"step": 132220 |
|
}, |
|
{ |
|
"epoch": 55.12, |
|
"learning_rate": 1.4490599001663895e-05, |
|
"loss": 0.0258, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 55.32, |
|
"learning_rate": 1.4469800332778703e-05, |
|
"loss": 0.0264, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 55.53, |
|
"learning_rate": 1.4449001663893512e-05, |
|
"loss": 0.0273, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 55.74, |
|
"learning_rate": 1.442820299500832e-05, |
|
"loss": 0.033, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 55.95, |
|
"learning_rate": 1.4407404326123129e-05, |
|
"loss": 0.0289, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_cer": 0.26, |
|
"eval_gen_len": 13.9024, |
|
"eval_loss": 0.38215455412864685, |
|
"eval_runtime": 268.768, |
|
"eval_samples_per_second": 11.929, |
|
"eval_steps_per_second": 2.984, |
|
"step": 134624 |
|
}, |
|
{ |
|
"epoch": 56.16, |
|
"learning_rate": 1.4386605657237937e-05, |
|
"loss": 0.0263, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 56.36, |
|
"learning_rate": 1.4365848585690517e-05, |
|
"loss": 0.0227, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 56.57, |
|
"learning_rate": 1.4345049916805324e-05, |
|
"loss": 0.0276, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 56.78, |
|
"learning_rate": 1.4324251247920134e-05, |
|
"loss": 0.0277, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"learning_rate": 1.4303494176372714e-05, |
|
"loss": 0.0259, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_cer": 0.2541, |
|
"eval_gen_len": 13.6722, |
|
"eval_loss": 0.3895968198776245, |
|
"eval_runtime": 256.4716, |
|
"eval_samples_per_second": 12.5, |
|
"eval_steps_per_second": 3.127, |
|
"step": 137028 |
|
}, |
|
{ |
|
"epoch": 57.2, |
|
"learning_rate": 1.4282695507487523e-05, |
|
"loss": 0.0202, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 57.4, |
|
"learning_rate": 1.4261896838602332e-05, |
|
"loss": 0.0236, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 57.61, |
|
"learning_rate": 1.424109816971714e-05, |
|
"loss": 0.0288, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 57.82, |
|
"learning_rate": 1.422029950083195e-05, |
|
"loss": 0.0277, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_cer": 0.2584, |
|
"eval_gen_len": 13.7208, |
|
"eval_loss": 0.38816508650779724, |
|
"eval_runtime": 264.0106, |
|
"eval_samples_per_second": 12.143, |
|
"eval_steps_per_second": 3.038, |
|
"step": 139432 |
|
}, |
|
{ |
|
"epoch": 58.03, |
|
"learning_rate": 1.4199500831946757e-05, |
|
"loss": 0.0297, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 58.24, |
|
"learning_rate": 1.4178702163061566e-05, |
|
"loss": 0.0206, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 58.44, |
|
"learning_rate": 1.4157945091514145e-05, |
|
"loss": 0.0245, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 58.65, |
|
"learning_rate": 1.4137146422628954e-05, |
|
"loss": 0.0264, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 58.86, |
|
"learning_rate": 1.4116347753743762e-05, |
|
"loss": 0.0289, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_cer": 0.2587, |
|
"eval_gen_len": 13.7096, |
|
"eval_loss": 0.39096423983573914, |
|
"eval_runtime": 260.1849, |
|
"eval_samples_per_second": 12.322, |
|
"eval_steps_per_second": 3.082, |
|
"step": 141836 |
|
}, |
|
{ |
|
"epoch": 59.07, |
|
"learning_rate": 1.4095549084858571e-05, |
|
"loss": 0.0243, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 59.28, |
|
"learning_rate": 1.4074750415973379e-05, |
|
"loss": 0.0218, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 59.48, |
|
"learning_rate": 1.4053951747088188e-05, |
|
"loss": 0.0241, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 59.69, |
|
"learning_rate": 1.4033194675540766e-05, |
|
"loss": 0.0242, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 59.9, |
|
"learning_rate": 1.4012396006655576e-05, |
|
"loss": 0.0252, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_cer": 0.2515, |
|
"eval_gen_len": 13.6971, |
|
"eval_loss": 0.38846734166145325, |
|
"eval_runtime": 258.3853, |
|
"eval_samples_per_second": 12.408, |
|
"eval_steps_per_second": 3.104, |
|
"step": 144240 |
|
}, |
|
{ |
|
"epoch": 60.11, |
|
"learning_rate": 1.3991597337770383e-05, |
|
"loss": 0.0259, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 60.32, |
|
"learning_rate": 1.3970798668885193e-05, |
|
"loss": 0.0208, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 60.52, |
|
"learning_rate": 1.3950000000000002e-05, |
|
"loss": 0.0252, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 60.73, |
|
"learning_rate": 1.392920133111481e-05, |
|
"loss": 0.0253, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 60.94, |
|
"learning_rate": 1.3908402662229619e-05, |
|
"loss": 0.0265, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_cer": 0.2569, |
|
"eval_gen_len": 13.85, |
|
"eval_loss": 0.3915986716747284, |
|
"eval_runtime": 271.1284, |
|
"eval_samples_per_second": 11.825, |
|
"eval_steps_per_second": 2.958, |
|
"step": 146644 |
|
}, |
|
{ |
|
"epoch": 61.15, |
|
"learning_rate": 1.3887603993344427e-05, |
|
"loss": 0.0232, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 61.36, |
|
"learning_rate": 1.3866846921797007e-05, |
|
"loss": 0.0233, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 61.56, |
|
"learning_rate": 1.3846048252911814e-05, |
|
"loss": 0.0224, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 61.77, |
|
"learning_rate": 1.3825249584026624e-05, |
|
"loss": 0.0229, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 61.98, |
|
"learning_rate": 1.3804450915141431e-05, |
|
"loss": 0.0229, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_cer": 0.2565, |
|
"eval_gen_len": 13.8206, |
|
"eval_loss": 0.3992536962032318, |
|
"eval_runtime": 268.8678, |
|
"eval_samples_per_second": 11.924, |
|
"eval_steps_per_second": 2.983, |
|
"step": 149048 |
|
}, |
|
{ |
|
"epoch": 62.19, |
|
"learning_rate": 1.378365224625624e-05, |
|
"loss": 0.0199, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"learning_rate": 1.3762895174708819e-05, |
|
"loss": 0.0239, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 62.6, |
|
"learning_rate": 1.3742096505823628e-05, |
|
"loss": 0.0261, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 62.81, |
|
"learning_rate": 1.3721297836938436e-05, |
|
"loss": 0.0225, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_cer": 0.2507, |
|
"eval_gen_len": 13.6354, |
|
"eval_loss": 0.3880002200603485, |
|
"eval_runtime": 264.7668, |
|
"eval_samples_per_second": 12.109, |
|
"eval_steps_per_second": 3.029, |
|
"step": 151452 |
|
}, |
|
{ |
|
"epoch": 63.02, |
|
"learning_rate": 1.3700499168053245e-05, |
|
"loss": 0.0268, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 63.23, |
|
"learning_rate": 1.3679742096505825e-05, |
|
"loss": 0.0188, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 63.44, |
|
"learning_rate": 1.3658985024958405e-05, |
|
"loss": 0.022, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"learning_rate": 1.3638186356073213e-05, |
|
"loss": 0.0232, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 63.85, |
|
"learning_rate": 1.3617387687188022e-05, |
|
"loss": 0.0221, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_cer": 0.2583, |
|
"eval_gen_len": 13.9485, |
|
"eval_loss": 0.3893982470035553, |
|
"eval_runtime": 270.1502, |
|
"eval_samples_per_second": 11.867, |
|
"eval_steps_per_second": 2.969, |
|
"step": 153856 |
|
}, |
|
{ |
|
"epoch": 64.06, |
|
"learning_rate": 1.359658901830283e-05, |
|
"loss": 0.0268, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 64.27, |
|
"learning_rate": 1.3575790349417639e-05, |
|
"loss": 0.0159, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 64.48, |
|
"learning_rate": 1.3554991680532448e-05, |
|
"loss": 0.0191, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 64.68, |
|
"learning_rate": 1.3534193011647256e-05, |
|
"loss": 0.02, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 64.89, |
|
"learning_rate": 1.3513394342762065e-05, |
|
"loss": 0.0234, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_cer": 0.2515, |
|
"eval_gen_len": 13.6329, |
|
"eval_loss": 0.39693862199783325, |
|
"eval_runtime": 262.2666, |
|
"eval_samples_per_second": 12.224, |
|
"eval_steps_per_second": 3.058, |
|
"step": 156260 |
|
}, |
|
{ |
|
"epoch": 65.1, |
|
"learning_rate": 1.3492595673876873e-05, |
|
"loss": 0.0225, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 65.31, |
|
"learning_rate": 1.3471838602329453e-05, |
|
"loss": 0.0219, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 65.52, |
|
"learning_rate": 1.3451081530782031e-05, |
|
"loss": 0.0224, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 65.72, |
|
"learning_rate": 1.3430282861896839e-05, |
|
"loss": 0.0222, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 65.93, |
|
"learning_rate": 1.3409484193011648e-05, |
|
"loss": 0.0251, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_cer": 0.2524, |
|
"eval_gen_len": 13.9046, |
|
"eval_loss": 0.3977407217025757, |
|
"eval_runtime": 268.0146, |
|
"eval_samples_per_second": 11.962, |
|
"eval_steps_per_second": 2.992, |
|
"step": 158664 |
|
}, |
|
{ |
|
"epoch": 66.14, |
|
"learning_rate": 1.3388685524126458e-05, |
|
"loss": 0.0181, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 66.35, |
|
"learning_rate": 1.3367886855241265e-05, |
|
"loss": 0.0194, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 66.56, |
|
"learning_rate": 1.3347129783693843e-05, |
|
"loss": 0.0209, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 66.76, |
|
"learning_rate": 1.3326331114808653e-05, |
|
"loss": 0.0235, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 66.97, |
|
"learning_rate": 1.3305532445923462e-05, |
|
"loss": 0.0253, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_cer": 0.2511, |
|
"eval_gen_len": 13.6563, |
|
"eval_loss": 0.40182340145111084, |
|
"eval_runtime": 252.4386, |
|
"eval_samples_per_second": 12.7, |
|
"eval_steps_per_second": 3.177, |
|
"step": 161068 |
|
}, |
|
{ |
|
"epoch": 67.18, |
|
"learning_rate": 1.328473377703827e-05, |
|
"loss": 0.0218, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 67.39, |
|
"learning_rate": 1.326393510815308e-05, |
|
"loss": 0.0175, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 67.6, |
|
"learning_rate": 1.3243136439267887e-05, |
|
"loss": 0.0204, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"learning_rate": 1.3222337770382696e-05, |
|
"loss": 0.0197, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_cer": 0.2559, |
|
"eval_gen_len": 14.0237, |
|
"eval_loss": 0.3884351849555969, |
|
"eval_runtime": 272.5844, |
|
"eval_samples_per_second": 11.761, |
|
"eval_steps_per_second": 2.942, |
|
"step": 163472 |
|
}, |
|
{ |
|
"epoch": 68.01, |
|
"learning_rate": 1.3201539101497504e-05, |
|
"loss": 0.0241, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 68.22, |
|
"learning_rate": 1.3180782029950084e-05, |
|
"loss": 0.0179, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 68.43, |
|
"learning_rate": 1.3159983361064892e-05, |
|
"loss": 0.0175, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 68.64, |
|
"learning_rate": 1.3139184692179701e-05, |
|
"loss": 0.0227, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 68.84, |
|
"learning_rate": 1.3118386023294509e-05, |
|
"loss": 0.0219, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_cer": 0.2519, |
|
"eval_gen_len": 13.5009, |
|
"eval_loss": 0.402326375246048, |
|
"eval_runtime": 254.8714, |
|
"eval_samples_per_second": 12.579, |
|
"eval_steps_per_second": 3.147, |
|
"step": 165876 |
|
}, |
|
{ |
|
"epoch": 69.05, |
|
"learning_rate": 1.309762895174709e-05, |
|
"loss": 0.0229, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 69.26, |
|
"learning_rate": 1.30768302828619e-05, |
|
"loss": 0.0183, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 69.47, |
|
"learning_rate": 1.3056073211314478e-05, |
|
"loss": 0.0195, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 69.68, |
|
"learning_rate": 1.3035274542429285e-05, |
|
"loss": 0.0204, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 69.88, |
|
"learning_rate": 1.3014475873544095e-05, |
|
"loss": 0.0207, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_cer": 0.2559, |
|
"eval_gen_len": 13.4392, |
|
"eval_loss": 0.390462189912796, |
|
"eval_runtime": 247.3478, |
|
"eval_samples_per_second": 12.962, |
|
"eval_steps_per_second": 3.242, |
|
"step": 168280 |
|
}, |
|
{ |
|
"epoch": 70.09, |
|
"learning_rate": 1.2993677204658904e-05, |
|
"loss": 0.0216, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 70.3, |
|
"learning_rate": 1.2972878535773712e-05, |
|
"loss": 0.0192, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 70.51, |
|
"learning_rate": 1.2952079866888521e-05, |
|
"loss": 0.0192, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 70.72, |
|
"learning_rate": 1.2931281198003329e-05, |
|
"loss": 0.0187, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 70.92, |
|
"learning_rate": 1.2910524126455907e-05, |
|
"loss": 0.0233, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_cer": 0.2574, |
|
"eval_gen_len": 13.7012, |
|
"eval_loss": 0.4090117812156677, |
|
"eval_runtime": 257.6398, |
|
"eval_samples_per_second": 12.444, |
|
"eval_steps_per_second": 3.113, |
|
"step": 170684 |
|
}, |
|
{ |
|
"epoch": 71.13, |
|
"learning_rate": 1.2889725457570716e-05, |
|
"loss": 0.019, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 71.34, |
|
"learning_rate": 1.2868926788685526e-05, |
|
"loss": 0.0159, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 71.55, |
|
"learning_rate": 1.2848128119800333e-05, |
|
"loss": 0.0213, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 71.76, |
|
"learning_rate": 1.2827329450915143e-05, |
|
"loss": 0.0177, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 71.96, |
|
"learning_rate": 1.280653078202995e-05, |
|
"loss": 0.024, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_cer": 0.2472, |
|
"eval_gen_len": 13.6382, |
|
"eval_loss": 0.38612431287765503, |
|
"eval_runtime": 258.2841, |
|
"eval_samples_per_second": 12.413, |
|
"eval_steps_per_second": 3.105, |
|
"step": 173088 |
|
}, |
|
{ |
|
"epoch": 72.17, |
|
"learning_rate": 1.278577371048253e-05, |
|
"loss": 0.0176, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 72.38, |
|
"learning_rate": 1.2764975041597338e-05, |
|
"loss": 0.017, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 72.59, |
|
"learning_rate": 1.2744176372712147e-05, |
|
"loss": 0.0167, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"learning_rate": 1.2723377703826955e-05, |
|
"loss": 0.0219, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_cer": 0.2647, |
|
"eval_gen_len": 14.1634, |
|
"eval_loss": 0.4713122546672821, |
|
"eval_runtime": 271.6072, |
|
"eval_samples_per_second": 11.804, |
|
"eval_steps_per_second": 2.953, |
|
"step": 175492 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 1.2702579034941764e-05, |
|
"loss": 0.0198, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 73.21, |
|
"learning_rate": 1.2681780366056574e-05, |
|
"loss": 0.0147, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 73.42, |
|
"learning_rate": 1.2660981697171381e-05, |
|
"loss": 0.0181, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 73.63, |
|
"learning_rate": 1.264018302828619e-05, |
|
"loss": 0.02, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 73.84, |
|
"learning_rate": 1.2619384359400998e-05, |
|
"loss": 0.0189, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_cer": 0.2536, |
|
"eval_gen_len": 13.9267, |
|
"eval_loss": 0.39804303646087646, |
|
"eval_runtime": 266.4263, |
|
"eval_samples_per_second": 12.033, |
|
"eval_steps_per_second": 3.01, |
|
"step": 177896 |
|
}, |
|
{ |
|
"epoch": 74.04, |
|
"learning_rate": 1.2598627287853578e-05, |
|
"loss": 0.0215, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 74.25, |
|
"learning_rate": 1.2577828618968386e-05, |
|
"loss": 0.0159, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 74.46, |
|
"learning_rate": 1.2557029950083195e-05, |
|
"loss": 0.0188, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 74.67, |
|
"learning_rate": 1.2536231281198003e-05, |
|
"loss": 0.0169, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 74.88, |
|
"learning_rate": 1.2515474209650585e-05, |
|
"loss": 0.0162, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_cer": 0.2529, |
|
"eval_gen_len": 13.6974, |
|
"eval_loss": 0.39674171805381775, |
|
"eval_runtime": 262.8858, |
|
"eval_samples_per_second": 12.195, |
|
"eval_steps_per_second": 3.051, |
|
"step": 180300 |
|
}, |
|
{ |
|
"epoch": 75.08, |
|
"learning_rate": 1.2494675540765392e-05, |
|
"loss": 0.0193, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 75.29, |
|
"learning_rate": 1.2473876871880202e-05, |
|
"loss": 0.0162, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 75.5, |
|
"learning_rate": 1.2453078202995011e-05, |
|
"loss": 0.0174, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 75.71, |
|
"learning_rate": 1.243232113144759e-05, |
|
"loss": 0.017, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 75.92, |
|
"learning_rate": 1.2411522462562397e-05, |
|
"loss": 0.0183, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_cer": 0.2512, |
|
"eval_gen_len": 13.708, |
|
"eval_loss": 0.42503321170806885, |
|
"eval_runtime": 262.6259, |
|
"eval_samples_per_second": 12.207, |
|
"eval_steps_per_second": 3.054, |
|
"step": 182704 |
|
}, |
|
{ |
|
"epoch": 76.12, |
|
"learning_rate": 1.2390765391014977e-05, |
|
"loss": 0.0172, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 76.33, |
|
"learning_rate": 1.2369966722129784e-05, |
|
"loss": 0.0164, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 76.54, |
|
"learning_rate": 1.2349168053244594e-05, |
|
"loss": 0.0179, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 76.75, |
|
"learning_rate": 1.2328369384359401e-05, |
|
"loss": 0.0181, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 76.96, |
|
"learning_rate": 1.230757071547421e-05, |
|
"loss": 0.0202, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_cer": 0.2548, |
|
"eval_gen_len": 13.8085, |
|
"eval_loss": 0.4239508807659149, |
|
"eval_runtime": 252.2131, |
|
"eval_samples_per_second": 12.711, |
|
"eval_steps_per_second": 3.18, |
|
"step": 185108 |
|
}, |
|
{ |
|
"epoch": 77.16, |
|
"learning_rate": 1.228677204658902e-05, |
|
"loss": 0.015, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 77.37, |
|
"learning_rate": 1.2265973377703828e-05, |
|
"loss": 0.0148, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 77.58, |
|
"learning_rate": 1.2245174708818637e-05, |
|
"loss": 0.0179, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 77.79, |
|
"learning_rate": 1.2224417637271215e-05, |
|
"loss": 0.019, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 1.2203660565723794e-05, |
|
"loss": 0.0186, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_cer": 0.2522, |
|
"eval_gen_len": 13.806, |
|
"eval_loss": 0.4266161620616913, |
|
"eval_runtime": 261.9494, |
|
"eval_samples_per_second": 12.239, |
|
"eval_steps_per_second": 3.062, |
|
"step": 187512 |
|
}, |
|
{ |
|
"epoch": 78.2, |
|
"learning_rate": 1.2182861896838603e-05, |
|
"loss": 0.0155, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 78.41, |
|
"learning_rate": 1.216206322795341e-05, |
|
"loss": 0.0167, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 78.62, |
|
"learning_rate": 1.214126455906822e-05, |
|
"loss": 0.0176, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 78.83, |
|
"learning_rate": 1.212046589018303e-05, |
|
"loss": 0.016, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_cer": 0.2499, |
|
"eval_gen_len": 13.713, |
|
"eval_loss": 0.4089159667491913, |
|
"eval_runtime": 255.1636, |
|
"eval_samples_per_second": 12.564, |
|
"eval_steps_per_second": 3.143, |
|
"step": 189916 |
|
}, |
|
{ |
|
"epoch": 79.03, |
|
"learning_rate": 1.2099667221297837e-05, |
|
"loss": 0.0165, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 79.24, |
|
"learning_rate": 1.2078868552412646e-05, |
|
"loss": 0.0136, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 79.45, |
|
"learning_rate": 1.2058111480865226e-05, |
|
"loss": 0.0154, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 79.66, |
|
"learning_rate": 1.2037312811980036e-05, |
|
"loss": 0.0168, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 79.87, |
|
"learning_rate": 1.2016514143094843e-05, |
|
"loss": 0.0188, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_cer": 0.2501, |
|
"eval_gen_len": 13.7745, |
|
"eval_loss": 0.41349881887435913, |
|
"eval_runtime": 263.2542, |
|
"eval_samples_per_second": 12.178, |
|
"eval_steps_per_second": 3.046, |
|
"step": 192320 |
|
}, |
|
{ |
|
"epoch": 80.07, |
|
"learning_rate": 1.1995715474209653e-05, |
|
"loss": 0.0169, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 80.28, |
|
"learning_rate": 1.1974916805324459e-05, |
|
"loss": 0.0142, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 80.49, |
|
"learning_rate": 1.1954118136439268e-05, |
|
"loss": 0.0186, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 80.7, |
|
"learning_rate": 1.1933319467554076e-05, |
|
"loss": 0.0173, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 80.91, |
|
"learning_rate": 1.1912520798668885e-05, |
|
"loss": 0.016, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_cer": 0.2477, |
|
"eval_gen_len": 13.6622, |
|
"eval_loss": 0.3864258825778961, |
|
"eval_runtime": 259.6006, |
|
"eval_samples_per_second": 12.35, |
|
"eval_steps_per_second": 3.089, |
|
"step": 194724 |
|
}, |
|
{ |
|
"epoch": 81.11, |
|
"learning_rate": 1.1891763727121467e-05, |
|
"loss": 0.0166, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 81.32, |
|
"learning_rate": 1.1870965058236274e-05, |
|
"loss": 0.0141, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 81.53, |
|
"learning_rate": 1.1850207986688853e-05, |
|
"loss": 0.0151, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 81.74, |
|
"learning_rate": 1.1829409317803662e-05, |
|
"loss": 0.0176, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 81.95, |
|
"learning_rate": 1.180861064891847e-05, |
|
"loss": 0.0156, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_cer": 0.2551, |
|
"eval_gen_len": 13.9261, |
|
"eval_loss": 0.4278740882873535, |
|
"eval_runtime": 261.2997, |
|
"eval_samples_per_second": 12.269, |
|
"eval_steps_per_second": 3.069, |
|
"step": 197128 |
|
}, |
|
{ |
|
"epoch": 82.15, |
|
"learning_rate": 1.1787811980033279e-05, |
|
"loss": 0.0173, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 82.36, |
|
"learning_rate": 1.1767013311148088e-05, |
|
"loss": 0.0153, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 82.57, |
|
"learning_rate": 1.1746214642262896e-05, |
|
"loss": 0.016, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 82.78, |
|
"learning_rate": 1.1725415973377705e-05, |
|
"loss": 0.0136, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 82.99, |
|
"learning_rate": 1.1704617304492513e-05, |
|
"loss": 0.018, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_cer": 0.25, |
|
"eval_gen_len": 13.704, |
|
"eval_loss": 0.4216358959674835, |
|
"eval_runtime": 257.2522, |
|
"eval_samples_per_second": 12.462, |
|
"eval_steps_per_second": 3.118, |
|
"step": 199532 |
|
}, |
|
{ |
|
"epoch": 83.19, |
|
"learning_rate": 1.1683818635607322e-05, |
|
"loss": 0.0125, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 83.4, |
|
"learning_rate": 1.16630615640599e-05, |
|
"loss": 0.0158, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 83.61, |
|
"learning_rate": 1.1642304492512479e-05, |
|
"loss": 0.0153, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 83.82, |
|
"learning_rate": 1.1621505823627288e-05, |
|
"loss": 0.0159, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_cer": 0.2502, |
|
"eval_gen_len": 13.7121, |
|
"eval_loss": 0.42220476269721985, |
|
"eval_runtime": 260.9684, |
|
"eval_samples_per_second": 12.285, |
|
"eval_steps_per_second": 3.073, |
|
"step": 201936 |
|
}, |
|
{ |
|
"epoch": 84.03, |
|
"learning_rate": 1.1600707154742097e-05, |
|
"loss": 0.0159, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 84.23, |
|
"learning_rate": 1.1579908485856905e-05, |
|
"loss": 0.0157, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 84.44, |
|
"learning_rate": 1.1559109816971715e-05, |
|
"loss": 0.0149, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 84.65, |
|
"learning_rate": 1.1538311148086522e-05, |
|
"loss": 0.0148, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 84.86, |
|
"learning_rate": 1.1517512479201332e-05, |
|
"loss": 0.0165, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_cer": 0.2482, |
|
"eval_gen_len": 13.7233, |
|
"eval_loss": 0.4069821238517761, |
|
"eval_runtime": 256.2579, |
|
"eval_samples_per_second": 12.511, |
|
"eval_steps_per_second": 3.13, |
|
"step": 204340 |
|
}, |
|
{ |
|
"epoch": 85.07, |
|
"learning_rate": 1.149675540765391e-05, |
|
"loss": 0.0162, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 85.27, |
|
"learning_rate": 1.1475956738768719e-05, |
|
"loss": 0.0128, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 85.48, |
|
"learning_rate": 1.1455158069883527e-05, |
|
"loss": 0.0149, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 85.69, |
|
"learning_rate": 1.1434359400998336e-05, |
|
"loss": 0.0182, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 85.9, |
|
"learning_rate": 1.1413560732113146e-05, |
|
"loss": 0.0149, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_cer": 0.2493, |
|
"eval_gen_len": 13.7623, |
|
"eval_loss": 0.4060095250606537, |
|
"eval_runtime": 264.6587, |
|
"eval_samples_per_second": 12.114, |
|
"eval_steps_per_second": 3.03, |
|
"step": 206744 |
|
}, |
|
{ |
|
"epoch": 86.11, |
|
"learning_rate": 1.1392762063227953e-05, |
|
"loss": 0.0131, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 86.31, |
|
"learning_rate": 1.1371963394342763e-05, |
|
"loss": 0.0145, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 86.52, |
|
"learning_rate": 1.135116472545757e-05, |
|
"loss": 0.0133, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 86.73, |
|
"learning_rate": 1.1330407653910152e-05, |
|
"loss": 0.0141, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 86.94, |
|
"learning_rate": 1.130960898502496e-05, |
|
"loss": 0.014, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_cer": 0.2461, |
|
"eval_gen_len": 13.6067, |
|
"eval_loss": 0.42620450258255005, |
|
"eval_runtime": 258.5687, |
|
"eval_samples_per_second": 12.399, |
|
"eval_steps_per_second": 3.102, |
|
"step": 209148 |
|
}, |
|
{ |
|
"epoch": 87.15, |
|
"learning_rate": 1.1288810316139769e-05, |
|
"loss": 0.0131, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 87.35, |
|
"learning_rate": 1.1268011647254578e-05, |
|
"loss": 0.0121, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 87.56, |
|
"learning_rate": 1.1247212978369386e-05, |
|
"loss": 0.015, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 87.77, |
|
"learning_rate": 1.1226414309484195e-05, |
|
"loss": 0.0137, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 87.98, |
|
"learning_rate": 1.1205615640599003e-05, |
|
"loss": 0.0161, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_cer": 0.249, |
|
"eval_gen_len": 13.758, |
|
"eval_loss": 0.4252397418022156, |
|
"eval_runtime": 265.7747, |
|
"eval_samples_per_second": 12.063, |
|
"eval_steps_per_second": 3.018, |
|
"step": 211552 |
|
}, |
|
{ |
|
"epoch": 88.19, |
|
"learning_rate": 1.1184816971713812e-05, |
|
"loss": 0.0126, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 88.39, |
|
"learning_rate": 1.116405990016639e-05, |
|
"loss": 0.0136, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 88.6, |
|
"learning_rate": 1.1143302828618969e-05, |
|
"loss": 0.0141, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 88.81, |
|
"learning_rate": 1.1122504159733778e-05, |
|
"loss": 0.0142, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_cer": 0.2511, |
|
"eval_gen_len": 13.8013, |
|
"eval_loss": 0.43667590618133545, |
|
"eval_runtime": 258.922, |
|
"eval_samples_per_second": 12.382, |
|
"eval_steps_per_second": 3.097, |
|
"step": 213956 |
|
}, |
|
{ |
|
"epoch": 89.02, |
|
"learning_rate": 1.1101705490848587e-05, |
|
"loss": 0.0155, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 89.23, |
|
"learning_rate": 1.1080906821963395e-05, |
|
"loss": 0.0149, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 89.43, |
|
"learning_rate": 1.1060108153078204e-05, |
|
"loss": 0.0138, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 89.64, |
|
"learning_rate": 1.1039309484193012e-05, |
|
"loss": 0.0131, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 89.85, |
|
"learning_rate": 1.1018510815307822e-05, |
|
"loss": 0.0146, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_cer": 0.2483, |
|
"eval_gen_len": 13.6778, |
|
"eval_loss": 0.41625672578811646, |
|
"eval_runtime": 260.915, |
|
"eval_samples_per_second": 12.288, |
|
"eval_steps_per_second": 3.074, |
|
"step": 216360 |
|
}, |
|
{ |
|
"epoch": 90.06, |
|
"learning_rate": 1.099771214642263e-05, |
|
"loss": 0.0131, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 90.27, |
|
"learning_rate": 1.0976955074875209e-05, |
|
"loss": 0.0128, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 90.47, |
|
"learning_rate": 1.0956156405990017e-05, |
|
"loss": 0.0139, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 90.68, |
|
"learning_rate": 1.0935357737104826e-05, |
|
"loss": 0.0146, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 90.89, |
|
"learning_rate": 1.0914559068219634e-05, |
|
"loss": 0.0127, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_cer": 0.2466, |
|
"eval_gen_len": 13.6344, |
|
"eval_loss": 0.42400336265563965, |
|
"eval_runtime": 259.4085, |
|
"eval_samples_per_second": 12.359, |
|
"eval_steps_per_second": 3.092, |
|
"step": 218764 |
|
}, |
|
{ |
|
"epoch": 91.1, |
|
"learning_rate": 1.0893843594009986e-05, |
|
"loss": 0.0131, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 91.31, |
|
"learning_rate": 1.0873044925124794e-05, |
|
"loss": 0.0139, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"learning_rate": 1.0852246256239603e-05, |
|
"loss": 0.0144, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 91.72, |
|
"learning_rate": 1.083144758735441e-05, |
|
"loss": 0.016, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 91.93, |
|
"learning_rate": 1.081064891846922e-05, |
|
"loss": 0.0147, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_cer": 0.2457, |
|
"eval_gen_len": 13.5948, |
|
"eval_loss": 0.4094228148460388, |
|
"eval_runtime": 257.4623, |
|
"eval_samples_per_second": 12.452, |
|
"eval_steps_per_second": 3.115, |
|
"step": 221168 |
|
}, |
|
{ |
|
"epoch": 92.14, |
|
"learning_rate": 1.078985024958403e-05, |
|
"loss": 0.0136, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 92.35, |
|
"learning_rate": 1.0769051580698837e-05, |
|
"loss": 0.0123, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 92.55, |
|
"learning_rate": 1.0748252911813646e-05, |
|
"loss": 0.0113, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 92.76, |
|
"learning_rate": 1.0727454242928454e-05, |
|
"loss": 0.0153, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 92.97, |
|
"learning_rate": 1.0706697171381034e-05, |
|
"loss": 0.0153, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_cer": 0.2414, |
|
"eval_gen_len": 13.5168, |
|
"eval_loss": 0.419572651386261, |
|
"eval_runtime": 254.173, |
|
"eval_samples_per_second": 12.613, |
|
"eval_steps_per_second": 3.155, |
|
"step": 223572 |
|
}, |
|
{ |
|
"epoch": 93.18, |
|
"learning_rate": 1.0685898502495842e-05, |
|
"loss": 0.0131, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 93.39, |
|
"learning_rate": 1.0665099833610651e-05, |
|
"loss": 0.0138, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 93.59, |
|
"learning_rate": 1.0644301164725459e-05, |
|
"loss": 0.0125, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 93.8, |
|
"learning_rate": 1.0623544093178037e-05, |
|
"loss": 0.0158, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_cer": 0.2491, |
|
"eval_gen_len": 13.8378, |
|
"eval_loss": 0.4395461678504944, |
|
"eval_runtime": 257.6708, |
|
"eval_samples_per_second": 12.442, |
|
"eval_steps_per_second": 3.112, |
|
"step": 225976 |
|
}, |
|
{ |
|
"epoch": 94.01, |
|
"learning_rate": 1.0602745424292846e-05, |
|
"loss": 0.0141, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 94.22, |
|
"learning_rate": 1.0581946755407656e-05, |
|
"loss": 0.0147, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 94.43, |
|
"learning_rate": 1.0561148086522463e-05, |
|
"loss": 0.0119, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 94.63, |
|
"learning_rate": 1.0540391014975041e-05, |
|
"loss": 0.0139, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 94.84, |
|
"learning_rate": 1.0519633943427621e-05, |
|
"loss": 0.0138, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_cer": 0.2518, |
|
"eval_gen_len": 13.7695, |
|
"eval_loss": 0.43901219964027405, |
|
"eval_runtime": 262.2107, |
|
"eval_samples_per_second": 12.227, |
|
"eval_steps_per_second": 3.059, |
|
"step": 228380 |
|
}, |
|
{ |
|
"epoch": 95.05, |
|
"learning_rate": 1.0498835274542429e-05, |
|
"loss": 0.0121, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 95.26, |
|
"learning_rate": 1.0478036605657238e-05, |
|
"loss": 0.0106, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 95.47, |
|
"learning_rate": 1.0457237936772046e-05, |
|
"loss": 0.0126, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 95.67, |
|
"learning_rate": 1.0436439267886855e-05, |
|
"loss": 0.0156, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 95.88, |
|
"learning_rate": 1.0415640599001665e-05, |
|
"loss": 0.0123, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_cer": 0.2475, |
|
"eval_gen_len": 13.69, |
|
"eval_loss": 0.4405384361743927, |
|
"eval_runtime": 260.5068, |
|
"eval_samples_per_second": 12.307, |
|
"eval_steps_per_second": 3.079, |
|
"step": 230784 |
|
}, |
|
{ |
|
"epoch": 96.09, |
|
"learning_rate": 1.0394841930116472e-05, |
|
"loss": 0.011, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 96.3, |
|
"learning_rate": 1.0374084858569054e-05, |
|
"loss": 0.0132, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 96.51, |
|
"learning_rate": 1.0353286189683862e-05, |
|
"loss": 0.0135, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 96.71, |
|
"learning_rate": 1.0332487520798671e-05, |
|
"loss": 0.0143, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 96.92, |
|
"learning_rate": 1.0311688851913479e-05, |
|
"loss": 0.0136, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_cer": 0.2492, |
|
"eval_gen_len": 13.791, |
|
"eval_loss": 0.4154476225376129, |
|
"eval_runtime": 266.6041, |
|
"eval_samples_per_second": 12.025, |
|
"eval_steps_per_second": 3.008, |
|
"step": 233188 |
|
}, |
|
{ |
|
"epoch": 97.13, |
|
"learning_rate": 1.0290890183028288e-05, |
|
"loss": 0.0117, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 97.34, |
|
"learning_rate": 1.0270091514143094e-05, |
|
"loss": 0.0116, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 97.55, |
|
"learning_rate": 1.0249292845257903e-05, |
|
"loss": 0.0117, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 97.75, |
|
"learning_rate": 1.0228494176372713e-05, |
|
"loss": 0.0158, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 97.96, |
|
"learning_rate": 1.0207737104825293e-05, |
|
"loss": 0.012, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_cer": 0.2481, |
|
"eval_gen_len": 13.8702, |
|
"eval_loss": 0.43725699186325073, |
|
"eval_runtime": 257.4309, |
|
"eval_samples_per_second": 12.454, |
|
"eval_steps_per_second": 3.115, |
|
"step": 235592 |
|
}, |
|
{ |
|
"epoch": 98.17, |
|
"learning_rate": 1.0186938435940102e-05, |
|
"loss": 0.0098, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 98.38, |
|
"learning_rate": 1.016618136439268e-05, |
|
"loss": 0.0118, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 98.59, |
|
"learning_rate": 1.0145382695507488e-05, |
|
"loss": 0.0128, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 98.79, |
|
"learning_rate": 1.0124584026622297e-05, |
|
"loss": 0.0122, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_cer": 0.2504, |
|
"eval_gen_len": 13.8051, |
|
"eval_loss": 0.44157010316848755, |
|
"eval_runtime": 284.8185, |
|
"eval_samples_per_second": 11.256, |
|
"eval_steps_per_second": 2.816, |
|
"step": 237996 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"learning_rate": 1.0103785357737107e-05, |
|
"loss": 0.0136, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 99.21, |
|
"learning_rate": 1.0082986688851914e-05, |
|
"loss": 0.0115, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 99.42, |
|
"learning_rate": 1.0062188019966724e-05, |
|
"loss": 0.0109, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 99.63, |
|
"learning_rate": 1.0041389351081531e-05, |
|
"loss": 0.0135, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 99.83, |
|
"learning_rate": 1.002059068219634e-05, |
|
"loss": 0.0146, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_cer": 0.2489, |
|
"eval_gen_len": 13.6868, |
|
"eval_loss": 0.42966365814208984, |
|
"eval_runtime": 282.168, |
|
"eval_samples_per_second": 11.362, |
|
"eval_steps_per_second": 2.842, |
|
"step": 240400 |
|
}, |
|
{ |
|
"epoch": 100.04, |
|
"learning_rate": 9.99979201331115e-06, |
|
"loss": 0.0136, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 100.25, |
|
"learning_rate": 9.979034941763728e-06, |
|
"loss": 0.0132, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 100.46, |
|
"learning_rate": 9.958236272878536e-06, |
|
"loss": 0.0112, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 100.67, |
|
"learning_rate": 9.937437603993345e-06, |
|
"loss": 0.0139, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 100.87, |
|
"learning_rate": 9.916638935108155e-06, |
|
"loss": 0.0135, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_cer": 0.2428, |
|
"eval_gen_len": 13.5689, |
|
"eval_loss": 0.4310346245765686, |
|
"eval_runtime": 280.207, |
|
"eval_samples_per_second": 11.442, |
|
"eval_steps_per_second": 2.862, |
|
"step": 242804 |
|
}, |
|
{ |
|
"epoch": 101.08, |
|
"learning_rate": 9.895881863560733e-06, |
|
"loss": 0.0118, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 101.29, |
|
"learning_rate": 9.875083194675542e-06, |
|
"loss": 0.0111, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 101.5, |
|
"learning_rate": 9.854284525790352e-06, |
|
"loss": 0.0115, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 101.71, |
|
"learning_rate": 9.83352745424293e-06, |
|
"loss": 0.0124, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 101.91, |
|
"learning_rate": 9.812728785357737e-06, |
|
"loss": 0.0136, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_cer": 0.246, |
|
"eval_gen_len": 13.4972, |
|
"eval_loss": 0.422376424074173, |
|
"eval_runtime": 269.57, |
|
"eval_samples_per_second": 11.893, |
|
"eval_steps_per_second": 2.975, |
|
"step": 245208 |
|
}, |
|
{ |
|
"epoch": 102.12, |
|
"learning_rate": 9.791930116472547e-06, |
|
"loss": 0.0124, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 102.33, |
|
"learning_rate": 9.771131447587356e-06, |
|
"loss": 0.0101, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 102.54, |
|
"learning_rate": 9.750332778702164e-06, |
|
"loss": 0.0109, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 102.75, |
|
"learning_rate": 9.729534109816973e-06, |
|
"loss": 0.0108, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 102.95, |
|
"learning_rate": 9.70873544093178e-06, |
|
"loss": 0.014, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_cer": 0.2502, |
|
"eval_gen_len": 13.8394, |
|
"eval_loss": 0.45676541328430176, |
|
"eval_runtime": 270.1526, |
|
"eval_samples_per_second": 11.867, |
|
"eval_steps_per_second": 2.969, |
|
"step": 247612 |
|
}, |
|
{ |
|
"epoch": 103.16, |
|
"learning_rate": 9.68793677204659e-06, |
|
"loss": 0.0106, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 103.37, |
|
"learning_rate": 9.667179700499168e-06, |
|
"loss": 0.0117, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 103.58, |
|
"learning_rate": 9.646381031613978e-06, |
|
"loss": 0.0114, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 103.79, |
|
"learning_rate": 9.625582362728785e-06, |
|
"loss": 0.0135, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 103.99, |
|
"learning_rate": 9.604783693843595e-06, |
|
"loss": 0.0125, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_cer": 0.2465, |
|
"eval_gen_len": 13.4345, |
|
"eval_loss": 0.39919513463974, |
|
"eval_runtime": 270.6771, |
|
"eval_samples_per_second": 11.844, |
|
"eval_steps_per_second": 2.963, |
|
"step": 250016 |
|
}, |
|
{ |
|
"epoch": 104.2, |
|
"learning_rate": 9.583985024958402e-06, |
|
"loss": 0.01, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 104.41, |
|
"learning_rate": 9.563227953410982e-06, |
|
"loss": 0.0126, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 104.62, |
|
"learning_rate": 9.542429284525792e-06, |
|
"loss": 0.0109, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 104.83, |
|
"learning_rate": 9.5216306156406e-06, |
|
"loss": 0.0135, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_cer": 0.2472, |
|
"eval_gen_len": 13.7277, |
|
"eval_loss": 0.44164207577705383, |
|
"eval_runtime": 274.8419, |
|
"eval_samples_per_second": 11.665, |
|
"eval_steps_per_second": 2.918, |
|
"step": 252420 |
|
}, |
|
{ |
|
"epoch": 105.03, |
|
"learning_rate": 9.500831946755409e-06, |
|
"loss": 0.0119, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 105.24, |
|
"learning_rate": 9.480033277870218e-06, |
|
"loss": 0.0095, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 105.45, |
|
"learning_rate": 9.459276206322796e-06, |
|
"loss": 0.0115, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 105.66, |
|
"learning_rate": 9.438477537437604e-06, |
|
"loss": 0.0135, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 105.87, |
|
"learning_rate": 9.417678868552413e-06, |
|
"loss": 0.012, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_cer": 0.2416, |
|
"eval_gen_len": 13.4994, |
|
"eval_loss": 0.41192150115966797, |
|
"eval_runtime": 275.6326, |
|
"eval_samples_per_second": 11.631, |
|
"eval_steps_per_second": 2.91, |
|
"step": 254824 |
|
}, |
|
{ |
|
"epoch": 106.07, |
|
"learning_rate": 9.396880199667223e-06, |
|
"loss": 0.0114, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 106.28, |
|
"learning_rate": 9.376123128119801e-06, |
|
"loss": 0.0116, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 106.49, |
|
"learning_rate": 9.355324459234609e-06, |
|
"loss": 0.0093, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 106.7, |
|
"learning_rate": 9.334525790349418e-06, |
|
"loss": 0.0128, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 106.91, |
|
"learning_rate": 9.313768718801998e-06, |
|
"loss": 0.0133, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_cer": 0.2476, |
|
"eval_gen_len": 13.6494, |
|
"eval_loss": 0.42318016290664673, |
|
"eval_runtime": 280.8295, |
|
"eval_samples_per_second": 11.416, |
|
"eval_steps_per_second": 2.856, |
|
"step": 257228 |
|
}, |
|
{ |
|
"epoch": 107.11, |
|
"learning_rate": 9.292970049916805e-06, |
|
"loss": 0.0097, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 107.32, |
|
"learning_rate": 9.272171381031615e-06, |
|
"loss": 0.012, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 107.53, |
|
"learning_rate": 9.251372712146424e-06, |
|
"loss": 0.0093, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 107.74, |
|
"learning_rate": 9.230574043261232e-06, |
|
"loss": 0.0103, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 107.95, |
|
"learning_rate": 9.209775374376041e-06, |
|
"loss": 0.0103, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_cer": 0.2434, |
|
"eval_gen_len": 13.4925, |
|
"eval_loss": 0.425822377204895, |
|
"eval_runtime": 284.9328, |
|
"eval_samples_per_second": 11.252, |
|
"eval_steps_per_second": 2.815, |
|
"step": 259632 |
|
}, |
|
{ |
|
"epoch": 108.15, |
|
"learning_rate": 9.188976705490849e-06, |
|
"loss": 0.0108, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 108.36, |
|
"learning_rate": 9.168178036605658e-06, |
|
"loss": 0.0109, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 108.57, |
|
"learning_rate": 9.147420965058236e-06, |
|
"loss": 0.0125, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 108.78, |
|
"learning_rate": 9.126622296173046e-06, |
|
"loss": 0.0112, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 108.99, |
|
"learning_rate": 9.105823627287854e-06, |
|
"loss": 0.0116, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_cer": 0.2462, |
|
"eval_gen_len": 13.7115, |
|
"eval_loss": 0.43990305066108704, |
|
"eval_runtime": 285.2346, |
|
"eval_samples_per_second": 11.24, |
|
"eval_steps_per_second": 2.812, |
|
"step": 262036 |
|
}, |
|
{ |
|
"epoch": 109.19, |
|
"learning_rate": 9.085066555740433e-06, |
|
"loss": 0.0088, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 109.4, |
|
"learning_rate": 9.064267886855243e-06, |
|
"loss": 0.0098, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 109.61, |
|
"learning_rate": 9.04346921797005e-06, |
|
"loss": 0.0106, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 109.82, |
|
"learning_rate": 9.02267054908486e-06, |
|
"loss": 0.0123, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_cer": 0.2462, |
|
"eval_gen_len": 13.6023, |
|
"eval_loss": 0.41700801253318787, |
|
"eval_runtime": 287.8449, |
|
"eval_samples_per_second": 11.138, |
|
"eval_steps_per_second": 2.786, |
|
"step": 264440 |
|
}, |
|
{ |
|
"epoch": 110.02, |
|
"learning_rate": 9.00187188019967e-06, |
|
"loss": 0.0125, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 110.23, |
|
"learning_rate": 8.981073211314477e-06, |
|
"loss": 0.01, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 110.44, |
|
"learning_rate": 8.960316139767055e-06, |
|
"loss": 0.0099, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 110.65, |
|
"learning_rate": 8.939517470881864e-06, |
|
"loss": 0.0108, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 110.86, |
|
"learning_rate": 8.918718801996674e-06, |
|
"loss": 0.0109, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_cer": 0.2476, |
|
"eval_gen_len": 13.6859, |
|
"eval_loss": 0.4497167766094208, |
|
"eval_runtime": 326.8835, |
|
"eval_samples_per_second": 9.808, |
|
"eval_steps_per_second": 2.453, |
|
"step": 266844 |
|
}, |
|
{ |
|
"epoch": 111.06, |
|
"learning_rate": 8.897920133111481e-06, |
|
"loss": 0.0106, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 111.27, |
|
"learning_rate": 8.87712146422629e-06, |
|
"loss": 0.0103, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 111.48, |
|
"learning_rate": 8.856322795341098e-06, |
|
"loss": 0.0104, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 111.69, |
|
"learning_rate": 8.835524126455908e-06, |
|
"loss": 0.0126, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 111.9, |
|
"learning_rate": 8.814725457570717e-06, |
|
"loss": 0.0115, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_cer": 0.2528, |
|
"eval_gen_len": 13.9145, |
|
"eval_loss": 0.4540727734565735, |
|
"eval_runtime": 317.49, |
|
"eval_samples_per_second": 10.098, |
|
"eval_steps_per_second": 2.526, |
|
"step": 269248 |
|
}, |
|
{ |
|
"epoch": 112.1, |
|
"learning_rate": 8.794009983361066e-06, |
|
"loss": 0.0108, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 112.31, |
|
"learning_rate": 8.773211314475875e-06, |
|
"loss": 0.0092, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 112.52, |
|
"learning_rate": 8.752412645590683e-06, |
|
"loss": 0.0104, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 112.73, |
|
"learning_rate": 8.731613976705492e-06, |
|
"loss": 0.0091, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 112.94, |
|
"learning_rate": 8.7108153078203e-06, |
|
"loss": 0.0115, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_cer": 0.2449, |
|
"eval_gen_len": 13.4545, |
|
"eval_loss": 0.4440736770629883, |
|
"eval_runtime": 280.5689, |
|
"eval_samples_per_second": 11.427, |
|
"eval_steps_per_second": 2.858, |
|
"step": 271652 |
|
}, |
|
{ |
|
"epoch": 113.14, |
|
"learning_rate": 8.69001663893511e-06, |
|
"loss": 0.0092, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 113.35, |
|
"learning_rate": 8.669217970049919e-06, |
|
"loss": 0.0092, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 113.56, |
|
"learning_rate": 8.648419301164726e-06, |
|
"loss": 0.0109, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 113.77, |
|
"learning_rate": 8.627620632279536e-06, |
|
"loss": 0.0098, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 113.98, |
|
"learning_rate": 8.606821963394343e-06, |
|
"loss": 0.0113, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_cer": 0.2471, |
|
"eval_gen_len": 13.5477, |
|
"eval_loss": 0.4469629228115082, |
|
"eval_runtime": 276.6766, |
|
"eval_samples_per_second": 11.588, |
|
"eval_steps_per_second": 2.899, |
|
"step": 274056 |
|
}, |
|
{ |
|
"epoch": 114.18, |
|
"learning_rate": 8.586064891846923e-06, |
|
"loss": 0.0115, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 114.39, |
|
"learning_rate": 8.565266222961731e-06, |
|
"loss": 0.0077, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 114.6, |
|
"learning_rate": 8.54446755407654e-06, |
|
"loss": 0.0103, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 114.81, |
|
"learning_rate": 8.523668885191348e-06, |
|
"loss": 0.01, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_cer": 0.2537, |
|
"eval_gen_len": 13.9704, |
|
"eval_loss": 0.47477516531944275, |
|
"eval_runtime": 291.6272, |
|
"eval_samples_per_second": 10.993, |
|
"eval_steps_per_second": 2.75, |
|
"step": 276460 |
|
}, |
|
{ |
|
"epoch": 115.02, |
|
"learning_rate": 8.502911813643926e-06, |
|
"loss": 0.0116, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 115.22, |
|
"learning_rate": 8.482113144758736e-06, |
|
"loss": 0.0102, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 115.43, |
|
"learning_rate": 8.461314475873545e-06, |
|
"loss": 0.0085, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 115.64, |
|
"learning_rate": 8.440515806988353e-06, |
|
"loss": 0.0104, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 115.85, |
|
"learning_rate": 8.419758735440932e-06, |
|
"loss": 0.0125, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_cer": 0.2438, |
|
"eval_gen_len": 13.4816, |
|
"eval_loss": 0.4395754933357239, |
|
"eval_runtime": 273.4039, |
|
"eval_samples_per_second": 11.726, |
|
"eval_steps_per_second": 2.933, |
|
"step": 278864 |
|
}, |
|
{ |
|
"epoch": 116.06, |
|
"learning_rate": 8.398960066555742e-06, |
|
"loss": 0.0093, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 116.26, |
|
"learning_rate": 8.37816139767055e-06, |
|
"loss": 0.01, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 116.47, |
|
"learning_rate": 8.357362728785359e-06, |
|
"loss": 0.0087, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 116.68, |
|
"learning_rate": 8.336564059900167e-06, |
|
"loss": 0.0099, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 116.89, |
|
"learning_rate": 8.315765391014976e-06, |
|
"loss": 0.0101, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_cer": 0.2464, |
|
"eval_gen_len": 13.7723, |
|
"eval_loss": 0.44487103819847107, |
|
"eval_runtime": 278.0443, |
|
"eval_samples_per_second": 11.531, |
|
"eval_steps_per_second": 2.884, |
|
"step": 281268 |
|
}, |
|
{ |
|
"epoch": 117.1, |
|
"learning_rate": 8.295008319467554e-06, |
|
"loss": 0.0094, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 117.3, |
|
"learning_rate": 8.274209650582364e-06, |
|
"loss": 0.0077, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 117.51, |
|
"learning_rate": 8.253410981697171e-06, |
|
"loss": 0.01, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 117.72, |
|
"learning_rate": 8.23261231281198e-06, |
|
"loss": 0.0113, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 117.93, |
|
"learning_rate": 8.21181364392679e-06, |
|
"loss": 0.0108, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_cer": 0.249, |
|
"eval_gen_len": 13.7711, |
|
"eval_loss": 0.4563674330711365, |
|
"eval_runtime": 286.5388, |
|
"eval_samples_per_second": 11.189, |
|
"eval_steps_per_second": 2.799, |
|
"step": 283672 |
|
}, |
|
{ |
|
"epoch": 118.14, |
|
"learning_rate": 8.191014975041598e-06, |
|
"loss": 0.0093, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 118.34, |
|
"learning_rate": 8.170216306156407e-06, |
|
"loss": 0.0097, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 118.55, |
|
"learning_rate": 8.149459234608985e-06, |
|
"loss": 0.0088, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 118.76, |
|
"learning_rate": 8.128660565723795e-06, |
|
"loss": 0.0087, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 118.97, |
|
"learning_rate": 8.107861896838602e-06, |
|
"loss": 0.0121, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_cer": 0.2484, |
|
"eval_gen_len": 13.7848, |
|
"eval_loss": 0.4589692950248718, |
|
"eval_runtime": 285.9698, |
|
"eval_samples_per_second": 11.211, |
|
"eval_steps_per_second": 2.804, |
|
"step": 286076 |
|
}, |
|
{ |
|
"epoch": 119.18, |
|
"learning_rate": 8.087063227953412e-06, |
|
"loss": 0.0092, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 119.38, |
|
"learning_rate": 8.06626455906822e-06, |
|
"loss": 0.0104, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 119.59, |
|
"learning_rate": 8.045465890183029e-06, |
|
"loss": 0.0094, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 119.8, |
|
"learning_rate": 8.024667221297838e-06, |
|
"loss": 0.0111, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_cer": 0.2488, |
|
"eval_gen_len": 13.8621, |
|
"eval_loss": 0.46863117814064026, |
|
"eval_runtime": 282.0826, |
|
"eval_samples_per_second": 11.365, |
|
"eval_steps_per_second": 2.843, |
|
"step": 288480 |
|
}, |
|
{ |
|
"epoch": 120.01, |
|
"learning_rate": 8.003868552412646e-06, |
|
"loss": 0.0105, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 120.22, |
|
"learning_rate": 7.983111480865226e-06, |
|
"loss": 0.0086, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 120.42, |
|
"learning_rate": 7.962312811980035e-06, |
|
"loss": 0.0101, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 120.63, |
|
"learning_rate": 7.941514143094843e-06, |
|
"loss": 0.0103, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 120.84, |
|
"learning_rate": 7.920715474209652e-06, |
|
"loss": 0.009, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_cer": 0.2451, |
|
"eval_gen_len": 13.7648, |
|
"eval_loss": 0.44192788004875183, |
|
"eval_runtime": 276.6356, |
|
"eval_samples_per_second": 11.589, |
|
"eval_steps_per_second": 2.899, |
|
"step": 290884 |
|
}, |
|
{ |
|
"epoch": 121.05, |
|
"learning_rate": 7.89991680532446e-06, |
|
"loss": 0.0103, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 121.26, |
|
"learning_rate": 7.87915973377704e-06, |
|
"loss": 0.0109, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 121.46, |
|
"learning_rate": 7.858361064891847e-06, |
|
"loss": 0.0096, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 121.67, |
|
"learning_rate": 7.837603993344427e-06, |
|
"loss": 0.0104, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 121.88, |
|
"learning_rate": 7.816805324459236e-06, |
|
"loss": 0.0093, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_cer": 0.242, |
|
"eval_gen_len": 13.6089, |
|
"eval_loss": 0.43688440322875977, |
|
"eval_runtime": 279.7745, |
|
"eval_samples_per_second": 11.459, |
|
"eval_steps_per_second": 2.867, |
|
"step": 293288 |
|
}, |
|
{ |
|
"epoch": 122.09, |
|
"learning_rate": 7.796006655574044e-06, |
|
"loss": 0.0096, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 122.3, |
|
"learning_rate": 7.775207986688853e-06, |
|
"loss": 0.0083, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 122.5, |
|
"learning_rate": 7.754409317803661e-06, |
|
"loss": 0.0092, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 122.71, |
|
"learning_rate": 7.73361064891847e-06, |
|
"loss": 0.0119, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 122.92, |
|
"learning_rate": 7.71281198003328e-06, |
|
"loss": 0.0092, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_cer": 0.2475, |
|
"eval_gen_len": 13.6304, |
|
"eval_loss": 0.4489113390445709, |
|
"eval_runtime": 292.1033, |
|
"eval_samples_per_second": 10.976, |
|
"eval_steps_per_second": 2.746, |
|
"step": 295692 |
|
}, |
|
{ |
|
"epoch": 123.13, |
|
"learning_rate": 7.692054908485858e-06, |
|
"loss": 0.0096, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 123.34, |
|
"learning_rate": 7.671256239600666e-06, |
|
"loss": 0.0093, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 123.54, |
|
"learning_rate": 7.650457570715475e-06, |
|
"loss": 0.0094, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 123.75, |
|
"learning_rate": 7.629658901830283e-06, |
|
"loss": 0.0111, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 123.96, |
|
"learning_rate": 7.608860232945092e-06, |
|
"loss": 0.0099, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_cer": 0.2447, |
|
"eval_gen_len": 13.6413, |
|
"eval_loss": 0.45137402415275574, |
|
"eval_runtime": 273.6913, |
|
"eval_samples_per_second": 11.714, |
|
"eval_steps_per_second": 2.93, |
|
"step": 298096 |
|
}, |
|
{ |
|
"epoch": 124.17, |
|
"learning_rate": 7.588061564059901e-06, |
|
"loss": 0.0078, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 124.38, |
|
"learning_rate": 7.56726289517471e-06, |
|
"loss": 0.0093, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 124.58, |
|
"learning_rate": 7.5464642262895185e-06, |
|
"loss": 0.0089, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 124.79, |
|
"learning_rate": 7.525707154742097e-06, |
|
"loss": 0.01, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 7.5049500831946766e-06, |
|
"loss": 0.0103, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_cer": 0.2467, |
|
"eval_gen_len": 13.8041, |
|
"eval_loss": 0.45419880747795105, |
|
"eval_runtime": 277.7542, |
|
"eval_samples_per_second": 11.543, |
|
"eval_steps_per_second": 2.887, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 125.21, |
|
"learning_rate": 7.484151414309485e-06, |
|
"loss": 0.0076, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 125.42, |
|
"learning_rate": 7.463352745424294e-06, |
|
"loss": 0.0089, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 125.62, |
|
"learning_rate": 7.442554076539102e-06, |
|
"loss": 0.0089, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 125.83, |
|
"learning_rate": 7.4217554076539115e-06, |
|
"loss": 0.0121, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_cer": 0.2496, |
|
"eval_gen_len": 13.8525, |
|
"eval_loss": 0.4686892330646515, |
|
"eval_runtime": 288.7714, |
|
"eval_samples_per_second": 11.102, |
|
"eval_steps_per_second": 2.777, |
|
"step": 302904 |
|
}, |
|
{ |
|
"epoch": 126.04, |
|
"learning_rate": 7.40095673876872e-06, |
|
"loss": 0.0091, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 126.25, |
|
"learning_rate": 7.3801580698835285e-06, |
|
"loss": 0.0089, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 126.46, |
|
"learning_rate": 7.359359400998337e-06, |
|
"loss": 0.0092, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 126.66, |
|
"learning_rate": 7.338560732113146e-06, |
|
"loss": 0.0081, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 126.87, |
|
"learning_rate": 7.317845257903495e-06, |
|
"loss": 0.0116, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_cer": 0.2443, |
|
"eval_gen_len": 13.6432, |
|
"eval_loss": 0.4484730660915375, |
|
"eval_runtime": 273.2803, |
|
"eval_samples_per_second": 11.732, |
|
"eval_steps_per_second": 2.935, |
|
"step": 305308 |
|
}, |
|
{ |
|
"epoch": 127.08, |
|
"learning_rate": 7.297046589018303e-06, |
|
"loss": 0.0077, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 127.29, |
|
"learning_rate": 7.276247920133111e-06, |
|
"loss": 0.009, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 127.5, |
|
"learning_rate": 7.25544925124792e-06, |
|
"loss": 0.0094, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 127.7, |
|
"learning_rate": 7.234650582362729e-06, |
|
"loss": 0.0081, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 127.91, |
|
"learning_rate": 7.213851913477538e-06, |
|
"loss": 0.0105, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_cer": 0.2437, |
|
"eval_gen_len": 13.7661, |
|
"eval_loss": 0.4494189918041229, |
|
"eval_runtime": 282.0309, |
|
"eval_samples_per_second": 11.368, |
|
"eval_steps_per_second": 2.844, |
|
"step": 307712 |
|
}, |
|
{ |
|
"epoch": 128.12, |
|
"learning_rate": 7.193094841930118e-06, |
|
"loss": 0.0085, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 128.33, |
|
"learning_rate": 7.172296173044926e-06, |
|
"loss": 0.0086, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 128.54, |
|
"learning_rate": 7.151497504159735e-06, |
|
"loss": 0.0091, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 128.74, |
|
"learning_rate": 7.130698835274543e-06, |
|
"loss": 0.0094, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 128.95, |
|
"learning_rate": 7.109900166389352e-06, |
|
"loss": 0.0087, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_cer": 0.2465, |
|
"eval_gen_len": 13.5352, |
|
"eval_loss": 0.46537643671035767, |
|
"eval_runtime": 272.0294, |
|
"eval_samples_per_second": 11.785, |
|
"eval_steps_per_second": 2.948, |
|
"step": 310116 |
|
}, |
|
{ |
|
"epoch": 129.16, |
|
"learning_rate": 7.08910149750416e-06, |
|
"loss": 0.0088, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 129.37, |
|
"learning_rate": 7.0683028286189696e-06, |
|
"loss": 0.0076, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 129.58, |
|
"learning_rate": 7.047504159733778e-06, |
|
"loss": 0.0088, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 129.78, |
|
"learning_rate": 7.026705490848587e-06, |
|
"loss": 0.0092, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 129.99, |
|
"learning_rate": 7.005948419301165e-06, |
|
"loss": 0.0106, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_cer": 0.2457, |
|
"eval_gen_len": 13.5667, |
|
"eval_loss": 0.44364768266677856, |
|
"eval_runtime": 269.0291, |
|
"eval_samples_per_second": 11.917, |
|
"eval_steps_per_second": 2.981, |
|
"step": 312520 |
|
}, |
|
{ |
|
"epoch": 130.2, |
|
"learning_rate": 6.985149750415974e-06, |
|
"loss": 0.008, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 130.41, |
|
"learning_rate": 6.964351081530783e-06, |
|
"loss": 0.0072, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 130.62, |
|
"learning_rate": 6.943552412645591e-06, |
|
"loss": 0.0091, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 130.82, |
|
"learning_rate": 6.9227537437604e-06, |
|
"loss": 0.0087, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_cer": 0.2451, |
|
"eval_gen_len": 13.7358, |
|
"eval_loss": 0.4612971544265747, |
|
"eval_runtime": 274.1913, |
|
"eval_samples_per_second": 11.693, |
|
"eval_steps_per_second": 2.925, |
|
"step": 314924 |
|
}, |
|
{ |
|
"epoch": 131.03, |
|
"learning_rate": 6.90199667221298e-06, |
|
"loss": 0.0107, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 131.24, |
|
"learning_rate": 6.881239600665558e-06, |
|
"loss": 0.0082, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 131.45, |
|
"learning_rate": 6.860440931780366e-06, |
|
"loss": 0.0089, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 131.66, |
|
"learning_rate": 6.839642262895176e-06, |
|
"loss": 0.009, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 131.86, |
|
"learning_rate": 6.818843594009984e-06, |
|
"loss": 0.0104, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_cer": 0.2468, |
|
"eval_gen_len": 13.5936, |
|
"eval_loss": 0.4653697907924652, |
|
"eval_runtime": 277.9546, |
|
"eval_samples_per_second": 11.534, |
|
"eval_steps_per_second": 2.885, |
|
"step": 317328 |
|
}, |
|
{ |
|
"epoch": 132.07, |
|
"learning_rate": 6.798044925124793e-06, |
|
"loss": 0.0094, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 132.28, |
|
"learning_rate": 6.777246256239601e-06, |
|
"loss": 0.0079, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 132.49, |
|
"learning_rate": 6.75644758735441e-06, |
|
"loss": 0.0098, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 132.7, |
|
"learning_rate": 6.735648918469218e-06, |
|
"loss": 0.0081, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 132.9, |
|
"learning_rate": 6.714891846921797e-06, |
|
"loss": 0.0089, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_cer": 0.2455, |
|
"eval_gen_len": 13.6875, |
|
"eval_loss": 0.45620593428611755, |
|
"eval_runtime": 284.3664, |
|
"eval_samples_per_second": 11.274, |
|
"eval_steps_per_second": 2.82, |
|
"step": 319732 |
|
}, |
|
{ |
|
"epoch": 133.11, |
|
"learning_rate": 6.694093178036606e-06, |
|
"loss": 0.0085, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 133.32, |
|
"learning_rate": 6.673294509151414e-06, |
|
"loss": 0.0091, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 133.53, |
|
"learning_rate": 6.652495840266223e-06, |
|
"loss": 0.0073, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 133.74, |
|
"learning_rate": 6.631738768718803e-06, |
|
"loss": 0.0091, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 133.94, |
|
"learning_rate": 6.610940099833611e-06, |
|
"loss": 0.0088, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_cer": 0.2455, |
|
"eval_gen_len": 13.6653, |
|
"eval_loss": 0.4463290274143219, |
|
"eval_runtime": 265.7595, |
|
"eval_samples_per_second": 12.064, |
|
"eval_steps_per_second": 3.018, |
|
"step": 322136 |
|
}, |
|
{ |
|
"epoch": 134.15, |
|
"learning_rate": 6.59014143094842e-06, |
|
"loss": 0.0082, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 134.36, |
|
"learning_rate": 6.569342762063229e-06, |
|
"loss": 0.0087, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 134.57, |
|
"learning_rate": 6.548544093178038e-06, |
|
"loss": 0.009, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 134.78, |
|
"learning_rate": 6.527745424292846e-06, |
|
"loss": 0.0093, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 134.98, |
|
"learning_rate": 6.506946755407655e-06, |
|
"loss": 0.0088, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_cer": 0.245, |
|
"eval_gen_len": 13.5749, |
|
"eval_loss": 0.4489509165287018, |
|
"eval_runtime": 256.739, |
|
"eval_samples_per_second": 12.487, |
|
"eval_steps_per_second": 3.124, |
|
"step": 324540 |
|
}, |
|
{ |
|
"epoch": 135.19, |
|
"learning_rate": 6.486148086522463e-06, |
|
"loss": 0.0076, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 135.4, |
|
"learning_rate": 6.465349417637273e-06, |
|
"loss": 0.0078, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 135.61, |
|
"learning_rate": 6.444550748752081e-06, |
|
"loss": 0.007, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 135.82, |
|
"learning_rate": 6.423793677204659e-06, |
|
"loss": 0.0086, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_cer": 0.2458, |
|
"eval_gen_len": 13.8032, |
|
"eval_loss": 0.45637834072113037, |
|
"eval_runtime": 275.2079, |
|
"eval_samples_per_second": 11.649, |
|
"eval_steps_per_second": 2.914, |
|
"step": 326944 |
|
}, |
|
{ |
|
"epoch": 136.02, |
|
"learning_rate": 6.402995008319468e-06, |
|
"loss": 0.0097, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 136.23, |
|
"learning_rate": 6.382237936772047e-06, |
|
"loss": 0.0084, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 136.44, |
|
"learning_rate": 6.361439267886855e-06, |
|
"loss": 0.0084, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 136.65, |
|
"learning_rate": 6.340640599001664e-06, |
|
"loss": 0.0083, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 136.86, |
|
"learning_rate": 6.319841930116472e-06, |
|
"loss": 0.0083, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_cer": 0.2471, |
|
"eval_gen_len": 13.6478, |
|
"eval_loss": 0.4573554992675781, |
|
"eval_runtime": 269.5066, |
|
"eval_samples_per_second": 11.896, |
|
"eval_steps_per_second": 2.976, |
|
"step": 329348 |
|
}, |
|
{ |
|
"epoch": 137.06, |
|
"learning_rate": 6.299043261231281e-06, |
|
"loss": 0.0089, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 137.27, |
|
"learning_rate": 6.27824459234609e-06, |
|
"loss": 0.0087, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 137.48, |
|
"learning_rate": 6.257445923460899e-06, |
|
"loss": 0.0086, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 137.69, |
|
"learning_rate": 6.236647254575707e-06, |
|
"loss": 0.0085, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 137.9, |
|
"learning_rate": 6.215890183028287e-06, |
|
"loss": 0.0092, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_cer": 0.2487, |
|
"eval_gen_len": 13.8503, |
|
"eval_loss": 0.46958354115486145, |
|
"eval_runtime": 265.4531, |
|
"eval_samples_per_second": 12.077, |
|
"eval_steps_per_second": 3.021, |
|
"step": 331752 |
|
}, |
|
{ |
|
"epoch": 138.1, |
|
"learning_rate": 6.195091514143096e-06, |
|
"loss": 0.0079, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 138.31, |
|
"learning_rate": 6.174292845257904e-06, |
|
"loss": 0.0073, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 138.52, |
|
"learning_rate": 6.153494176372713e-06, |
|
"loss": 0.0074, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 138.73, |
|
"learning_rate": 6.132695507487521e-06, |
|
"loss": 0.0089, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 138.94, |
|
"learning_rate": 6.111896838602331e-06, |
|
"loss": 0.0082, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_cer": 0.2476, |
|
"eval_gen_len": 13.7916, |
|
"eval_loss": 0.4610365629196167, |
|
"eval_runtime": 273.061, |
|
"eval_samples_per_second": 11.741, |
|
"eval_steps_per_second": 2.937, |
|
"step": 334156 |
|
}, |
|
{ |
|
"epoch": 139.14, |
|
"learning_rate": 6.091098169717139e-06, |
|
"loss": 0.0076, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 139.35, |
|
"learning_rate": 6.070299500831948e-06, |
|
"loss": 0.0083, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 139.56, |
|
"learning_rate": 6.049542429284526e-06, |
|
"loss": 0.0088, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 139.77, |
|
"learning_rate": 6.0287437603993344e-06, |
|
"loss": 0.0078, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 139.98, |
|
"learning_rate": 6.007945091514144e-06, |
|
"loss": 0.0092, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_cer": 0.2432, |
|
"eval_gen_len": 13.6344, |
|
"eval_loss": 0.43919724225997925, |
|
"eval_runtime": 265.4301, |
|
"eval_samples_per_second": 12.079, |
|
"eval_steps_per_second": 3.022, |
|
"step": 336560 |
|
}, |
|
{ |
|
"epoch": 140.18, |
|
"learning_rate": 5.987146422628952e-06, |
|
"loss": 0.007, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 140.39, |
|
"learning_rate": 5.966347753743761e-06, |
|
"loss": 0.0078, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 140.6, |
|
"learning_rate": 5.945590682196339e-06, |
|
"loss": 0.0087, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 140.81, |
|
"learning_rate": 5.924792013311148e-06, |
|
"loss": 0.0083, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_cer": 0.2461, |
|
"eval_gen_len": 13.733, |
|
"eval_loss": 0.4848983883857727, |
|
"eval_runtime": 269.6844, |
|
"eval_samples_per_second": 11.888, |
|
"eval_steps_per_second": 2.974, |
|
"step": 338964 |
|
}, |
|
{ |
|
"epoch": 141.01, |
|
"learning_rate": 5.904034941763727e-06, |
|
"loss": 0.0083, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 141.22, |
|
"learning_rate": 5.883236272878537e-06, |
|
"loss": 0.0079, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 141.43, |
|
"learning_rate": 5.862437603993345e-06, |
|
"loss": 0.007, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 141.64, |
|
"learning_rate": 5.841638935108154e-06, |
|
"loss": 0.0085, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 141.85, |
|
"learning_rate": 5.820840266222962e-06, |
|
"loss": 0.0085, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_cer": 0.2475, |
|
"eval_gen_len": 13.8278, |
|
"eval_loss": 0.46004167199134827, |
|
"eval_runtime": 262.9827, |
|
"eval_samples_per_second": 12.191, |
|
"eval_steps_per_second": 3.05, |
|
"step": 341368 |
|
}, |
|
{ |
|
"epoch": 142.05, |
|
"learning_rate": 5.800041597337771e-06, |
|
"loss": 0.0079, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 142.26, |
|
"learning_rate": 5.779242928452579e-06, |
|
"loss": 0.0078, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 142.47, |
|
"learning_rate": 5.758444259567389e-06, |
|
"loss": 0.0083, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 142.68, |
|
"learning_rate": 5.737645590682197e-06, |
|
"loss": 0.0072, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 142.89, |
|
"learning_rate": 5.7168885191347755e-06, |
|
"loss": 0.008, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_cer": 0.2455, |
|
"eval_gen_len": 13.7137, |
|
"eval_loss": 0.4594569206237793, |
|
"eval_runtime": 270.269, |
|
"eval_samples_per_second": 11.862, |
|
"eval_steps_per_second": 2.967, |
|
"step": 343772 |
|
}, |
|
{ |
|
"epoch": 143.09, |
|
"learning_rate": 5.696089850249584e-06, |
|
"loss": 0.0085, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 143.3, |
|
"learning_rate": 5.675291181364393e-06, |
|
"loss": 0.0082, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 143.51, |
|
"learning_rate": 5.654492512479202e-06, |
|
"loss": 0.0078, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 143.72, |
|
"learning_rate": 5.633735440931781e-06, |
|
"loss": 0.0091, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 143.93, |
|
"learning_rate": 5.61293677204659e-06, |
|
"loss": 0.0084, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_cer": 0.2419, |
|
"eval_gen_len": 13.684, |
|
"eval_loss": 0.44198158383369446, |
|
"eval_runtime": 272.3122, |
|
"eval_samples_per_second": 11.773, |
|
"eval_steps_per_second": 2.945, |
|
"step": 346176 |
|
}, |
|
{ |
|
"epoch": 144.13, |
|
"learning_rate": 5.592138103161399e-06, |
|
"loss": 0.0073, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 144.34, |
|
"learning_rate": 5.571339434276207e-06, |
|
"loss": 0.0076, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 144.55, |
|
"learning_rate": 5.550540765391016e-06, |
|
"loss": 0.0075, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 144.76, |
|
"learning_rate": 5.529783693843595e-06, |
|
"loss": 0.0092, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 144.97, |
|
"learning_rate": 5.508985024958403e-06, |
|
"loss": 0.0087, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_cer": 0.2435, |
|
"eval_gen_len": 13.5817, |
|
"eval_loss": 0.4379993975162506, |
|
"eval_runtime": 266.6103, |
|
"eval_samples_per_second": 12.025, |
|
"eval_steps_per_second": 3.008, |
|
"step": 348580 |
|
}, |
|
{ |
|
"epoch": 145.17, |
|
"learning_rate": 5.488186356073212e-06, |
|
"loss": 0.007, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 145.38, |
|
"learning_rate": 5.46738768718802e-06, |
|
"loss": 0.0071, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 145.59, |
|
"learning_rate": 5.4466306156405994e-06, |
|
"loss": 0.0086, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 145.8, |
|
"learning_rate": 5.425831946755408e-06, |
|
"loss": 0.0074, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_cer": 0.2438, |
|
"eval_gen_len": 13.747, |
|
"eval_loss": 0.4474620223045349, |
|
"eval_runtime": 259.9915, |
|
"eval_samples_per_second": 12.331, |
|
"eval_steps_per_second": 3.085, |
|
"step": 350984 |
|
}, |
|
{ |
|
"epoch": 146.01, |
|
"learning_rate": 5.4050332778702165e-06, |
|
"loss": 0.0085, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 146.21, |
|
"learning_rate": 5.384234608985025e-06, |
|
"loss": 0.0071, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 146.42, |
|
"learning_rate": 5.3634359400998335e-06, |
|
"loss": 0.0065, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 146.63, |
|
"learning_rate": 5.342637271214642e-06, |
|
"loss": 0.0076, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 146.84, |
|
"learning_rate": 5.3218386023294514e-06, |
|
"loss": 0.0076, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_cer": 0.2433, |
|
"eval_gen_len": 13.5989, |
|
"eval_loss": 0.4507221579551697, |
|
"eval_runtime": 259.806, |
|
"eval_samples_per_second": 12.34, |
|
"eval_steps_per_second": 3.087, |
|
"step": 353388 |
|
}, |
|
{ |
|
"epoch": 147.05, |
|
"learning_rate": 5.30103993344426e-06, |
|
"loss": 0.0078, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 147.25, |
|
"learning_rate": 5.2802412645590685e-06, |
|
"loss": 0.0077, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 147.46, |
|
"learning_rate": 5.259484193011648e-06, |
|
"loss": 0.0075, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 147.67, |
|
"learning_rate": 5.238685524126457e-06, |
|
"loss": 0.0068, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 147.88, |
|
"learning_rate": 5.217886855241265e-06, |
|
"loss": 0.0091, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_cer": 0.2461, |
|
"eval_gen_len": 13.8125, |
|
"eval_loss": 0.47150808572769165, |
|
"eval_runtime": 262.9368, |
|
"eval_samples_per_second": 12.193, |
|
"eval_steps_per_second": 3.05, |
|
"step": 355792 |
|
}, |
|
{ |
|
"epoch": 148.09, |
|
"learning_rate": 5.1971297836938436e-06, |
|
"loss": 0.0082, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 148.29, |
|
"learning_rate": 5.176331114808653e-06, |
|
"loss": 0.0066, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 148.5, |
|
"learning_rate": 5.1555324459234614e-06, |
|
"loss": 0.0081, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 148.71, |
|
"learning_rate": 5.13473377703827e-06, |
|
"loss": 0.0074, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 148.92, |
|
"learning_rate": 5.1139351081530785e-06, |
|
"loss": 0.0078, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_cer": 0.2472, |
|
"eval_gen_len": 13.8253, |
|
"eval_loss": 0.46047914028167725, |
|
"eval_runtime": 271.5636, |
|
"eval_samples_per_second": 11.806, |
|
"eval_steps_per_second": 2.953, |
|
"step": 358196 |
|
}, |
|
{ |
|
"epoch": 149.13, |
|
"learning_rate": 5.093136439267887e-06, |
|
"loss": 0.0076, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 149.33, |
|
"learning_rate": 5.0723377703826955e-06, |
|
"loss": 0.0084, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 149.54, |
|
"learning_rate": 5.051539101497505e-06, |
|
"loss": 0.0081, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 149.75, |
|
"learning_rate": 5.0307404326123134e-06, |
|
"loss": 0.0075, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 149.96, |
|
"learning_rate": 5.009941763727122e-06, |
|
"loss": 0.0075, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_cer": 0.2416, |
|
"eval_gen_len": 13.4729, |
|
"eval_loss": 0.4448852837085724, |
|
"eval_runtime": 262.3745, |
|
"eval_samples_per_second": 12.219, |
|
"eval_steps_per_second": 3.057, |
|
"step": 360600 |
|
}, |
|
{ |
|
"epoch": 150.17, |
|
"learning_rate": 4.989184692179701e-06, |
|
"loss": 0.0078, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 150.37, |
|
"learning_rate": 4.9683860232945095e-06, |
|
"loss": 0.0065, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 150.58, |
|
"learning_rate": 4.947587354409318e-06, |
|
"loss": 0.0077, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 150.79, |
|
"learning_rate": 4.926788685524127e-06, |
|
"loss": 0.008, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"learning_rate": 4.905990016638936e-06, |
|
"loss": 0.0076, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_cer": 0.2458, |
|
"eval_gen_len": 13.7838, |
|
"eval_loss": 0.47447100281715393, |
|
"eval_runtime": 262.0741, |
|
"eval_samples_per_second": 12.233, |
|
"eval_steps_per_second": 3.06, |
|
"step": 363004 |
|
}, |
|
{ |
|
"epoch": 151.21, |
|
"learning_rate": 4.885232945091515e-06, |
|
"loss": 0.0075, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 151.41, |
|
"learning_rate": 4.8644342762063235e-06, |
|
"loss": 0.0074, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 151.62, |
|
"learning_rate": 4.843635607321132e-06, |
|
"loss": 0.0077, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 151.83, |
|
"learning_rate": 4.822878535773711e-06, |
|
"loss": 0.0076, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_cer": 0.2461, |
|
"eval_gen_len": 13.7093, |
|
"eval_loss": 0.4570690095424652, |
|
"eval_runtime": 264.709, |
|
"eval_samples_per_second": 12.111, |
|
"eval_steps_per_second": 3.03, |
|
"step": 365408 |
|
}, |
|
{ |
|
"epoch": 152.04, |
|
"learning_rate": 4.8020798668885195e-06, |
|
"loss": 0.0075, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 152.25, |
|
"learning_rate": 4.781281198003328e-06, |
|
"loss": 0.0066, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 152.45, |
|
"learning_rate": 4.7604825291181366e-06, |
|
"loss": 0.0079, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 152.66, |
|
"learning_rate": 4.739683860232945e-06, |
|
"loss": 0.0076, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 152.87, |
|
"learning_rate": 4.718885191347754e-06, |
|
"loss": 0.0088, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_cer": 0.2414, |
|
"eval_gen_len": 13.5468, |
|
"eval_loss": 0.45958101749420166, |
|
"eval_runtime": 263.6085, |
|
"eval_samples_per_second": 12.162, |
|
"eval_steps_per_second": 3.042, |
|
"step": 367812 |
|
}, |
|
{ |
|
"epoch": 153.08, |
|
"learning_rate": 4.6981281198003335e-06, |
|
"loss": 0.0081, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 153.29, |
|
"learning_rate": 4.677329450915142e-06, |
|
"loss": 0.0072, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 153.49, |
|
"learning_rate": 4.6565307820299505e-06, |
|
"loss": 0.007, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 153.7, |
|
"learning_rate": 4.635732113144759e-06, |
|
"loss": 0.0068, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 153.91, |
|
"learning_rate": 4.6149334442595676e-06, |
|
"loss": 0.0082, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_cer": 0.2475, |
|
"eval_gen_len": 13.8384, |
|
"eval_loss": 0.46518319845199585, |
|
"eval_runtime": 270.3025, |
|
"eval_samples_per_second": 11.861, |
|
"eval_steps_per_second": 2.967, |
|
"step": 370216 |
|
}, |
|
{ |
|
"epoch": 154.12, |
|
"learning_rate": 4.594134775374376e-06, |
|
"loss": 0.0067, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 154.33, |
|
"learning_rate": 4.5733361064891855e-06, |
|
"loss": 0.0073, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 154.53, |
|
"learning_rate": 4.552537437603994e-06, |
|
"loss": 0.0083, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 154.74, |
|
"learning_rate": 4.5317387687188025e-06, |
|
"loss": 0.0074, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 154.95, |
|
"learning_rate": 4.510940099833611e-06, |
|
"loss": 0.0077, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_cer": 0.2426, |
|
"eval_gen_len": 13.6457, |
|
"eval_loss": 0.46483084559440613, |
|
"eval_runtime": 258.2815, |
|
"eval_samples_per_second": 12.413, |
|
"eval_steps_per_second": 3.105, |
|
"step": 372620 |
|
}, |
|
{ |
|
"epoch": 155.16, |
|
"learning_rate": 4.49018302828619e-06, |
|
"loss": 0.0072, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 155.37, |
|
"learning_rate": 4.469384359400999e-06, |
|
"loss": 0.0066, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 155.57, |
|
"learning_rate": 4.448627287853578e-06, |
|
"loss": 0.0074, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 155.78, |
|
"learning_rate": 4.427828618968386e-06, |
|
"loss": 0.0075, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 155.99, |
|
"learning_rate": 4.407029950083195e-06, |
|
"loss": 0.0074, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_cer": 0.2422, |
|
"eval_gen_len": 13.5889, |
|
"eval_loss": 0.4521370232105255, |
|
"eval_runtime": 266.1373, |
|
"eval_samples_per_second": 12.046, |
|
"eval_steps_per_second": 3.013, |
|
"step": 375024 |
|
}, |
|
{ |
|
"epoch": 156.2, |
|
"learning_rate": 4.386231281198003e-06, |
|
"loss": 0.0072, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 156.41, |
|
"learning_rate": 4.3654326123128125e-06, |
|
"loss": 0.0073, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 156.61, |
|
"learning_rate": 4.344633943427621e-06, |
|
"loss": 0.0076, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 156.82, |
|
"learning_rate": 4.32383527454243e-06, |
|
"loss": 0.0073, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_cer": 0.2465, |
|
"eval_gen_len": 13.7867, |
|
"eval_loss": 0.47169268131256104, |
|
"eval_runtime": 264.268, |
|
"eval_samples_per_second": 12.132, |
|
"eval_steps_per_second": 3.035, |
|
"step": 377428 |
|
}, |
|
{ |
|
"epoch": 157.03, |
|
"learning_rate": 4.303036605657238e-06, |
|
"loss": 0.0075, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 157.24, |
|
"learning_rate": 4.282237936772047e-06, |
|
"loss": 0.0063, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 157.45, |
|
"learning_rate": 4.261480865224626e-06, |
|
"loss": 0.0069, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 157.65, |
|
"learning_rate": 4.240682196339434e-06, |
|
"loss": 0.0078, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 157.86, |
|
"learning_rate": 4.2198835274542435e-06, |
|
"loss": 0.0078, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_cer": 0.243, |
|
"eval_gen_len": 13.7143, |
|
"eval_loss": 0.46363481879234314, |
|
"eval_runtime": 263.4944, |
|
"eval_samples_per_second": 12.167, |
|
"eval_steps_per_second": 3.044, |
|
"step": 379832 |
|
}, |
|
{ |
|
"epoch": 158.07, |
|
"learning_rate": 4.199084858569052e-06, |
|
"loss": 0.0066, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 158.28, |
|
"learning_rate": 4.178286189683861e-06, |
|
"loss": 0.0073, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 158.49, |
|
"learning_rate": 4.15752911813644e-06, |
|
"loss": 0.0064, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 158.69, |
|
"learning_rate": 4.136730449251248e-06, |
|
"loss": 0.008, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 158.9, |
|
"learning_rate": 4.115931780366057e-06, |
|
"loss": 0.007, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_cer": 0.2413, |
|
"eval_gen_len": 13.6556, |
|
"eval_loss": 0.4499606192111969, |
|
"eval_runtime": 265.7001, |
|
"eval_samples_per_second": 12.066, |
|
"eval_steps_per_second": 3.018, |
|
"step": 382236 |
|
}, |
|
{ |
|
"epoch": 159.11, |
|
"learning_rate": 4.095133111480866e-06, |
|
"loss": 0.0074, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 159.32, |
|
"learning_rate": 4.0743344425956745e-06, |
|
"loss": 0.0064, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 159.53, |
|
"learning_rate": 4.053535773710483e-06, |
|
"loss": 0.0068, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 159.73, |
|
"learning_rate": 4.032778702163062e-06, |
|
"loss": 0.0068, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 159.94, |
|
"learning_rate": 4.011980033277871e-06, |
|
"loss": 0.0079, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_cer": 0.241, |
|
"eval_gen_len": 13.622, |
|
"eval_loss": 0.4524941146373749, |
|
"eval_runtime": 263.8331, |
|
"eval_samples_per_second": 12.152, |
|
"eval_steps_per_second": 3.04, |
|
"step": 384640 |
|
}, |
|
{ |
|
"epoch": 160.15, |
|
"learning_rate": 3.991181364392679e-06, |
|
"loss": 0.0076, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 160.36, |
|
"learning_rate": 3.9703826955074885e-06, |
|
"loss": 0.0062, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 160.57, |
|
"learning_rate": 3.949584026622296e-06, |
|
"loss": 0.0066, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 160.77, |
|
"learning_rate": 3.928785357737105e-06, |
|
"loss": 0.0067, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 160.98, |
|
"learning_rate": 3.908069883527454e-06, |
|
"loss": 0.0074, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_cer": 0.2439, |
|
"eval_gen_len": 13.7757, |
|
"eval_loss": 0.4422759711742401, |
|
"eval_runtime": 268.4253, |
|
"eval_samples_per_second": 11.944, |
|
"eval_steps_per_second": 2.988, |
|
"step": 387044 |
|
}, |
|
{ |
|
"epoch": 161.19, |
|
"learning_rate": 3.887271214642263e-06, |
|
"loss": 0.008, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 161.4, |
|
"learning_rate": 3.866472545757072e-06, |
|
"loss": 0.0068, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 161.61, |
|
"learning_rate": 3.845673876871881e-06, |
|
"loss": 0.0066, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 161.81, |
|
"learning_rate": 3.824875207986689e-06, |
|
"loss": 0.0081, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_cer": 0.2433, |
|
"eval_gen_len": 13.7096, |
|
"eval_loss": 0.4686408042907715, |
|
"eval_runtime": 266.054, |
|
"eval_samples_per_second": 12.05, |
|
"eval_steps_per_second": 3.014, |
|
"step": 389448 |
|
}, |
|
{ |
|
"epoch": 162.02, |
|
"learning_rate": 3.804118136439268e-06, |
|
"loss": 0.0073, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 162.23, |
|
"learning_rate": 3.7833194675540767e-06, |
|
"loss": 0.0057, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 162.44, |
|
"learning_rate": 3.7625207986688856e-06, |
|
"loss": 0.007, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 162.65, |
|
"learning_rate": 3.741722129783694e-06, |
|
"loss": 0.0074, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 162.85, |
|
"learning_rate": 3.7209234608985027e-06, |
|
"loss": 0.0067, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_cer": 0.2422, |
|
"eval_gen_len": 13.68, |
|
"eval_loss": 0.4667229950428009, |
|
"eval_runtime": 268.59, |
|
"eval_samples_per_second": 11.936, |
|
"eval_steps_per_second": 2.986, |
|
"step": 391852 |
|
}, |
|
{ |
|
"epoch": 163.06, |
|
"learning_rate": 3.7001247920133116e-06, |
|
"loss": 0.0081, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 163.27, |
|
"learning_rate": 3.67932612312812e-06, |
|
"loss": 0.0067, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 163.48, |
|
"learning_rate": 3.6585274542429287e-06, |
|
"loss": 0.0064, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 163.69, |
|
"learning_rate": 3.6377287853577376e-06, |
|
"loss": 0.0073, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 163.89, |
|
"learning_rate": 3.6169717138103167e-06, |
|
"loss": 0.0074, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_cer": 0.2418, |
|
"eval_gen_len": 13.6482, |
|
"eval_loss": 0.4521939754486084, |
|
"eval_runtime": 273.0322, |
|
"eval_samples_per_second": 11.742, |
|
"eval_steps_per_second": 2.937, |
|
"step": 394256 |
|
}, |
|
{ |
|
"epoch": 164.1, |
|
"learning_rate": 3.596173044925125e-06, |
|
"loss": 0.007, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 164.31, |
|
"learning_rate": 3.5754159733777038e-06, |
|
"loss": 0.0072, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 164.52, |
|
"learning_rate": 3.5546173044925127e-06, |
|
"loss": 0.0064, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 164.73, |
|
"learning_rate": 3.5338186356073212e-06, |
|
"loss": 0.0071, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 164.93, |
|
"learning_rate": 3.5130199667221298e-06, |
|
"loss": 0.0085, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_cer": 0.2414, |
|
"eval_gen_len": 13.6282, |
|
"eval_loss": 0.45710650086402893, |
|
"eval_runtime": 255.6061, |
|
"eval_samples_per_second": 12.543, |
|
"eval_steps_per_second": 3.138, |
|
"step": 396660 |
|
}, |
|
{ |
|
"epoch": 165.14, |
|
"learning_rate": 3.4922212978369387e-06, |
|
"loss": 0.0063, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 165.35, |
|
"learning_rate": 3.4714226289517472e-06, |
|
"loss": 0.0068, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 165.56, |
|
"learning_rate": 3.4506239600665558e-06, |
|
"loss": 0.0072, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 165.77, |
|
"learning_rate": 3.4298668885191348e-06, |
|
"loss": 0.0065, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 165.97, |
|
"learning_rate": 3.4090682196339437e-06, |
|
"loss": 0.0074, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_cer": 0.241, |
|
"eval_gen_len": 13.6073, |
|
"eval_loss": 0.45994168519973755, |
|
"eval_runtime": 256.591, |
|
"eval_samples_per_second": 12.495, |
|
"eval_steps_per_second": 3.126, |
|
"step": 399064 |
|
}, |
|
{ |
|
"epoch": 166.18, |
|
"learning_rate": 3.3882695507487522e-06, |
|
"loss": 0.0072, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 166.39, |
|
"learning_rate": 3.3674708818635608e-06, |
|
"loss": 0.0061, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 166.6, |
|
"learning_rate": 3.3466722129783697e-06, |
|
"loss": 0.0062, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 166.81, |
|
"learning_rate": 3.3258735440931782e-06, |
|
"loss": 0.0071, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_cer": 0.2427, |
|
"eval_gen_len": 13.5515, |
|
"eval_loss": 0.4614485502243042, |
|
"eval_runtime": 261.146, |
|
"eval_samples_per_second": 12.277, |
|
"eval_steps_per_second": 3.071, |
|
"step": 401468 |
|
}, |
|
{ |
|
"epoch": 167.01, |
|
"learning_rate": 3.305074875207987e-06, |
|
"loss": 0.0076, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 167.22, |
|
"learning_rate": 3.2842762063227957e-06, |
|
"loss": 0.0068, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 167.43, |
|
"learning_rate": 3.2634775374376042e-06, |
|
"loss": 0.0075, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 167.64, |
|
"learning_rate": 3.2427204658901832e-06, |
|
"loss": 0.007, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 167.85, |
|
"learning_rate": 3.221921797004992e-06, |
|
"loss": 0.0054, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_cer": 0.2471, |
|
"eval_gen_len": 13.8865, |
|
"eval_loss": 0.4741056263446808, |
|
"eval_runtime": 274.0865, |
|
"eval_samples_per_second": 11.697, |
|
"eval_steps_per_second": 2.926, |
|
"step": 403872 |
|
}, |
|
{ |
|
"epoch": 168.05, |
|
"learning_rate": 3.2011231281198007e-06, |
|
"loss": 0.0066, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 168.26, |
|
"learning_rate": 3.1803244592346092e-06, |
|
"loss": 0.0058, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 168.47, |
|
"learning_rate": 3.1595673876871887e-06, |
|
"loss": 0.0074, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 168.68, |
|
"learning_rate": 3.138768718801997e-06, |
|
"loss": 0.007, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 168.89, |
|
"learning_rate": 3.1179700499168057e-06, |
|
"loss": 0.0076, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"eval_cer": 0.2432, |
|
"eval_gen_len": 13.6996, |
|
"eval_loss": 0.46461164951324463, |
|
"eval_runtime": 266.1722, |
|
"eval_samples_per_second": 12.045, |
|
"eval_steps_per_second": 3.013, |
|
"step": 406276 |
|
}, |
|
{ |
|
"epoch": 169.09, |
|
"learning_rate": 3.097171381031614e-06, |
|
"loss": 0.0065, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 169.3, |
|
"learning_rate": 3.0763727121464228e-06, |
|
"loss": 0.0061, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 169.51, |
|
"learning_rate": 3.0555740432612313e-06, |
|
"loss": 0.0067, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 169.72, |
|
"learning_rate": 3.03477537437604e-06, |
|
"loss": 0.0075, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 169.93, |
|
"learning_rate": 3.0140183028286193e-06, |
|
"loss": 0.0064, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"eval_cer": 0.2407, |
|
"eval_gen_len": 13.6525, |
|
"eval_loss": 0.45978671312332153, |
|
"eval_runtime": 264.0025, |
|
"eval_samples_per_second": 12.144, |
|
"eval_steps_per_second": 3.038, |
|
"step": 408680 |
|
}, |
|
{ |
|
"epoch": 170.13, |
|
"learning_rate": 2.993219633943428e-06, |
|
"loss": 0.0058, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 170.34, |
|
"learning_rate": 2.9724209650582363e-06, |
|
"loss": 0.0072, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 170.55, |
|
"learning_rate": 2.9516222961730453e-06, |
|
"loss": 0.0073, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 170.76, |
|
"learning_rate": 2.9308236272878538e-06, |
|
"loss": 0.0067, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 170.97, |
|
"learning_rate": 2.9100249584026623e-06, |
|
"loss": 0.0066, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"eval_cer": 0.2463, |
|
"eval_gen_len": 13.8612, |
|
"eval_loss": 0.4791421890258789, |
|
"eval_runtime": 266.2729, |
|
"eval_samples_per_second": 12.04, |
|
"eval_steps_per_second": 3.012, |
|
"step": 411084 |
|
}, |
|
{ |
|
"epoch": 171.17, |
|
"learning_rate": 2.8892262895174713e-06, |
|
"loss": 0.0066, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 171.38, |
|
"learning_rate": 2.8684276206322798e-06, |
|
"loss": 0.0055, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 171.59, |
|
"learning_rate": 2.847670549084859e-06, |
|
"loss": 0.0072, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 171.8, |
|
"learning_rate": 2.8268718801996673e-06, |
|
"loss": 0.0067, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_cer": 0.2408, |
|
"eval_gen_len": 13.6622, |
|
"eval_loss": 0.4588078260421753, |
|
"eval_runtime": 266.7256, |
|
"eval_samples_per_second": 12.02, |
|
"eval_steps_per_second": 3.007, |
|
"step": 413488 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"learning_rate": 2.8060732113144763e-06, |
|
"loss": 0.0074, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 172.21, |
|
"learning_rate": 2.7852745424292848e-06, |
|
"loss": 0.0066, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 172.42, |
|
"learning_rate": 2.7644758735440937e-06, |
|
"loss": 0.0058, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 172.63, |
|
"learning_rate": 2.7436772046589023e-06, |
|
"loss": 0.0067, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 172.84, |
|
"learning_rate": 2.7228785357737108e-06, |
|
"loss": 0.0074, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"eval_cer": 0.2411, |
|
"eval_gen_len": 13.7199, |
|
"eval_loss": 0.45347917079925537, |
|
"eval_runtime": 266.7945, |
|
"eval_samples_per_second": 12.017, |
|
"eval_steps_per_second": 3.006, |
|
"step": 415892 |
|
}, |
|
{ |
|
"epoch": 173.04, |
|
"learning_rate": 2.70212146422629e-06, |
|
"loss": 0.0065, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 173.25, |
|
"learning_rate": 2.6813227953410987e-06, |
|
"loss": 0.0066, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 173.46, |
|
"learning_rate": 2.6605241264559073e-06, |
|
"loss": 0.006, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 173.67, |
|
"learning_rate": 2.639725457570716e-06, |
|
"loss": 0.0065, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 173.88, |
|
"learning_rate": 2.618926788685524e-06, |
|
"loss": 0.0073, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"eval_cer": 0.2413, |
|
"eval_gen_len": 13.7389, |
|
"eval_loss": 0.4470750093460083, |
|
"eval_runtime": 261.7235, |
|
"eval_samples_per_second": 12.25, |
|
"eval_steps_per_second": 3.064, |
|
"step": 418296 |
|
}, |
|
{ |
|
"epoch": 174.08, |
|
"learning_rate": 2.598128119800333e-06, |
|
"loss": 0.007, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 174.29, |
|
"learning_rate": 2.5773294509151414e-06, |
|
"loss": 0.0068, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 174.5, |
|
"learning_rate": 2.55653078202995e-06, |
|
"loss": 0.0073, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 174.71, |
|
"learning_rate": 2.5358153078203e-06, |
|
"loss": 0.0057, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 174.92, |
|
"learning_rate": 2.5150166389351083e-06, |
|
"loss": 0.0066, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_cer": 0.2421, |
|
"eval_gen_len": 13.7676, |
|
"eval_loss": 0.46230319142341614, |
|
"eval_runtime": 274.1203, |
|
"eval_samples_per_second": 11.696, |
|
"eval_steps_per_second": 2.926, |
|
"step": 420700 |
|
}, |
|
{ |
|
"epoch": 175.12, |
|
"learning_rate": 2.494217970049917e-06, |
|
"loss": 0.0072, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 175.33, |
|
"learning_rate": 2.473419301164726e-06, |
|
"loss": 0.006, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 175.54, |
|
"learning_rate": 2.4526206322795343e-06, |
|
"loss": 0.0068, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 175.75, |
|
"learning_rate": 2.4318635607321134e-06, |
|
"loss": 0.0068, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 175.96, |
|
"learning_rate": 2.411064891846922e-06, |
|
"loss": 0.0067, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_cer": 0.2427, |
|
"eval_gen_len": 13.7302, |
|
"eval_loss": 0.46741247177124023, |
|
"eval_runtime": 271.0852, |
|
"eval_samples_per_second": 11.827, |
|
"eval_steps_per_second": 2.958, |
|
"step": 423104 |
|
}, |
|
{ |
|
"epoch": 176.16, |
|
"learning_rate": 2.390266222961731e-06, |
|
"loss": 0.0065, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 176.37, |
|
"learning_rate": 2.3694675540765393e-06, |
|
"loss": 0.0062, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 176.58, |
|
"learning_rate": 2.348668885191348e-06, |
|
"loss": 0.0057, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 176.79, |
|
"learning_rate": 2.327870216306157e-06, |
|
"loss": 0.007, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"learning_rate": 2.307071547420965e-06, |
|
"loss": 0.0077, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"eval_cer": 0.2399, |
|
"eval_gen_len": 13.5359, |
|
"eval_loss": 0.45084038376808167, |
|
"eval_runtime": 262.6698, |
|
"eval_samples_per_second": 12.205, |
|
"eval_steps_per_second": 3.053, |
|
"step": 425508 |
|
}, |
|
{ |
|
"epoch": 177.2, |
|
"learning_rate": 2.286272878535774e-06, |
|
"loss": 0.0059, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 177.41, |
|
"learning_rate": 2.2654742096505824e-06, |
|
"loss": 0.0067, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 177.62, |
|
"learning_rate": 2.2447171381031614e-06, |
|
"loss": 0.006, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 177.83, |
|
"learning_rate": 2.2239184692179704e-06, |
|
"loss": 0.0066, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"eval_cer": 0.2408, |
|
"eval_gen_len": 13.7302, |
|
"eval_loss": 0.4653932452201843, |
|
"eval_runtime": 268.3858, |
|
"eval_samples_per_second": 11.945, |
|
"eval_steps_per_second": 2.988, |
|
"step": 427912 |
|
}, |
|
{ |
|
"epoch": 178.04, |
|
"learning_rate": 2.203119800332779e-06, |
|
"loss": 0.0064, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 178.24, |
|
"learning_rate": 2.1823211314475874e-06, |
|
"loss": 0.0055, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 178.45, |
|
"learning_rate": 2.161564059900167e-06, |
|
"loss": 0.0068, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 178.66, |
|
"learning_rate": 2.1407653910149754e-06, |
|
"loss": 0.0068, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 178.87, |
|
"learning_rate": 2.119966722129784e-06, |
|
"loss": 0.0067, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"eval_cer": 0.2408, |
|
"eval_gen_len": 13.6925, |
|
"eval_loss": 0.462500661611557, |
|
"eval_runtime": 268.4086, |
|
"eval_samples_per_second": 11.944, |
|
"eval_steps_per_second": 2.988, |
|
"step": 430316 |
|
}, |
|
{ |
|
"epoch": 179.08, |
|
"learning_rate": 2.0991680532445924e-06, |
|
"loss": 0.0067, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 179.28, |
|
"learning_rate": 2.078369384359401e-06, |
|
"loss": 0.0059, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 179.49, |
|
"learning_rate": 2.05761231281198e-06, |
|
"loss": 0.0064, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 179.7, |
|
"learning_rate": 2.036813643926789e-06, |
|
"loss": 0.0065, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 179.91, |
|
"learning_rate": 2.0160149750415974e-06, |
|
"loss": 0.0072, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_cer": 0.242, |
|
"eval_gen_len": 13.7545, |
|
"eval_loss": 0.46416959166526794, |
|
"eval_runtime": 267.9605, |
|
"eval_samples_per_second": 11.964, |
|
"eval_steps_per_second": 2.993, |
|
"step": 432720 |
|
}, |
|
{ |
|
"epoch": 180.12, |
|
"learning_rate": 1.995216306156406e-06, |
|
"loss": 0.0064, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 180.32, |
|
"learning_rate": 1.9744592346089854e-06, |
|
"loss": 0.0058, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 180.53, |
|
"learning_rate": 1.953660565723794e-06, |
|
"loss": 0.0063, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 180.74, |
|
"learning_rate": 1.9328618968386024e-06, |
|
"loss": 0.0065, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 180.95, |
|
"learning_rate": 1.9120632279534114e-06, |
|
"loss": 0.0066, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"eval_cer": 0.2401, |
|
"eval_gen_len": 13.6538, |
|
"eval_loss": 0.4692617356777191, |
|
"eval_runtime": 260.4012, |
|
"eval_samples_per_second": 12.312, |
|
"eval_steps_per_second": 3.08, |
|
"step": 435124 |
|
}, |
|
{ |
|
"epoch": 181.16, |
|
"learning_rate": 1.8912645590682197e-06, |
|
"loss": 0.0076, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 181.36, |
|
"learning_rate": 1.8705074875207987e-06, |
|
"loss": 0.0059, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 181.57, |
|
"learning_rate": 1.8497088186356074e-06, |
|
"loss": 0.0069, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 181.78, |
|
"learning_rate": 1.8289101497504162e-06, |
|
"loss": 0.0061, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 181.99, |
|
"learning_rate": 1.8081114808652247e-06, |
|
"loss": 0.0064, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"eval_cer": 0.2413, |
|
"eval_gen_len": 13.7302, |
|
"eval_loss": 0.4686383605003357, |
|
"eval_runtime": 260.1458, |
|
"eval_samples_per_second": 12.324, |
|
"eval_steps_per_second": 3.083, |
|
"step": 437528 |
|
}, |
|
{ |
|
"epoch": 182.2, |
|
"learning_rate": 1.787354409317804e-06, |
|
"loss": 0.0062, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 182.4, |
|
"learning_rate": 1.7665557404326125e-06, |
|
"loss": 0.0066, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 182.61, |
|
"learning_rate": 1.7457570715474212e-06, |
|
"loss": 0.0066, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 182.82, |
|
"learning_rate": 1.725e-06, |
|
"loss": 0.0067, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"eval_cer": 0.2415, |
|
"eval_gen_len": 13.7371, |
|
"eval_loss": 0.46899163722991943, |
|
"eval_runtime": 270.7897, |
|
"eval_samples_per_second": 11.839, |
|
"eval_steps_per_second": 2.962, |
|
"step": 439932 |
|
}, |
|
{ |
|
"epoch": 183.03, |
|
"learning_rate": 1.7042013311148087e-06, |
|
"loss": 0.0059, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 183.24, |
|
"learning_rate": 1.6834026622296173e-06, |
|
"loss": 0.0057, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 183.44, |
|
"learning_rate": 1.662603993344426e-06, |
|
"loss": 0.0062, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 183.65, |
|
"learning_rate": 1.6418053244592347e-06, |
|
"loss": 0.0061, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 183.86, |
|
"learning_rate": 1.6210066555740433e-06, |
|
"loss": 0.0067, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_cer": 0.2392, |
|
"eval_gen_len": 13.7442, |
|
"eval_loss": 0.47148939967155457, |
|
"eval_runtime": 271.9757, |
|
"eval_samples_per_second": 11.788, |
|
"eval_steps_per_second": 2.949, |
|
"step": 442336 |
|
}, |
|
{ |
|
"epoch": 184.07, |
|
"learning_rate": 1.600207986688852e-06, |
|
"loss": 0.0066, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 184.28, |
|
"learning_rate": 1.5794093178036607e-06, |
|
"loss": 0.0063, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 184.48, |
|
"learning_rate": 1.5586106489184695e-06, |
|
"loss": 0.0063, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 184.69, |
|
"learning_rate": 1.5378535773710485e-06, |
|
"loss": 0.0063, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 184.9, |
|
"learning_rate": 1.5170965058236273e-06, |
|
"loss": 0.0062, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"eval_cer": 0.2395, |
|
"eval_gen_len": 13.6572, |
|
"eval_loss": 0.46137315034866333, |
|
"eval_runtime": 269.0871, |
|
"eval_samples_per_second": 11.914, |
|
"eval_steps_per_second": 2.98, |
|
"step": 444740 |
|
}, |
|
{ |
|
"epoch": 185.11, |
|
"learning_rate": 1.496297836938436e-06, |
|
"loss": 0.0066, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 185.32, |
|
"learning_rate": 1.4754991680532445e-06, |
|
"loss": 0.006, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 185.52, |
|
"learning_rate": 1.4547004991680533e-06, |
|
"loss": 0.0053, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 185.73, |
|
"learning_rate": 1.433901830282862e-06, |
|
"loss": 0.0075, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 185.94, |
|
"learning_rate": 1.4131031613976705e-06, |
|
"loss": 0.0068, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"eval_cer": 0.2396, |
|
"eval_gen_len": 13.6185, |
|
"eval_loss": 0.46076661348342896, |
|
"eval_runtime": 256.715, |
|
"eval_samples_per_second": 12.489, |
|
"eval_steps_per_second": 3.124, |
|
"step": 447144 |
|
}, |
|
{ |
|
"epoch": 186.15, |
|
"learning_rate": 1.3923044925124793e-06, |
|
"loss": 0.0062, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 186.36, |
|
"learning_rate": 1.371505823627288e-06, |
|
"loss": 0.0058, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 186.56, |
|
"learning_rate": 1.3507071547420965e-06, |
|
"loss": 0.0065, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 186.77, |
|
"learning_rate": 1.3299500831946758e-06, |
|
"loss": 0.0065, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 186.98, |
|
"learning_rate": 1.3091514143094845e-06, |
|
"loss": 0.0064, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"eval_cer": 0.2391, |
|
"eval_gen_len": 13.7558, |
|
"eval_loss": 0.47285133600234985, |
|
"eval_runtime": 259.8275, |
|
"eval_samples_per_second": 12.339, |
|
"eval_steps_per_second": 3.087, |
|
"step": 449548 |
|
}, |
|
{ |
|
"epoch": 187.19, |
|
"learning_rate": 1.288352745424293e-06, |
|
"loss": 0.0054, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 187.4, |
|
"learning_rate": 1.2675540765391017e-06, |
|
"loss": 0.0064, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 187.6, |
|
"learning_rate": 1.2467554076539103e-06, |
|
"loss": 0.0059, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 187.81, |
|
"learning_rate": 1.225956738768719e-06, |
|
"loss": 0.0059, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_cer": 0.2397, |
|
"eval_gen_len": 13.6946, |
|
"eval_loss": 0.47259289026260376, |
|
"eval_runtime": 273.3002, |
|
"eval_samples_per_second": 11.731, |
|
"eval_steps_per_second": 2.935, |
|
"step": 451952 |
|
}, |
|
{ |
|
"epoch": 188.02, |
|
"learning_rate": 1.2051996672212978e-06, |
|
"loss": 0.0073, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 188.23, |
|
"learning_rate": 1.1844009983361065e-06, |
|
"loss": 0.0065, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 188.44, |
|
"learning_rate": 1.1636439267886856e-06, |
|
"loss": 0.0069, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 188.64, |
|
"learning_rate": 1.1428452579034943e-06, |
|
"loss": 0.0062, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 188.85, |
|
"learning_rate": 1.1220881863560733e-06, |
|
"loss": 0.0052, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"eval_cer": 0.239, |
|
"eval_gen_len": 13.6185, |
|
"eval_loss": 0.4666392505168915, |
|
"eval_runtime": 268.978, |
|
"eval_samples_per_second": 11.919, |
|
"eval_steps_per_second": 2.982, |
|
"step": 454356 |
|
}, |
|
{ |
|
"epoch": 189.06, |
|
"learning_rate": 1.1012895174708818e-06, |
|
"loss": 0.0061, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 189.27, |
|
"learning_rate": 1.0804908485856906e-06, |
|
"loss": 0.0065, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 189.48, |
|
"learning_rate": 1.0596921797004993e-06, |
|
"loss": 0.0051, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 189.68, |
|
"learning_rate": 1.038893510815308e-06, |
|
"loss": 0.0066, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 189.89, |
|
"learning_rate": 1.0180948419301166e-06, |
|
"loss": 0.0066, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"eval_cer": 0.2381, |
|
"eval_gen_len": 13.6825, |
|
"eval_loss": 0.4684942066669464, |
|
"eval_runtime": 269.3222, |
|
"eval_samples_per_second": 11.904, |
|
"eval_steps_per_second": 2.978, |
|
"step": 456760 |
|
}, |
|
{ |
|
"epoch": 190.1, |
|
"learning_rate": 9.97296173044925e-07, |
|
"loss": 0.0061, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 190.31, |
|
"learning_rate": 9.765391014975043e-07, |
|
"loss": 0.0058, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 190.52, |
|
"learning_rate": 9.557404326123129e-07, |
|
"loss": 0.0058, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 190.72, |
|
"learning_rate": 9.349417637271216e-07, |
|
"loss": 0.0065, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 190.93, |
|
"learning_rate": 9.141430948419302e-07, |
|
"loss": 0.0061, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"eval_cer": 0.238, |
|
"eval_gen_len": 13.6538, |
|
"eval_loss": 0.46481847763061523, |
|
"eval_runtime": 260.0695, |
|
"eval_samples_per_second": 12.327, |
|
"eval_steps_per_second": 3.084, |
|
"step": 459164 |
|
}, |
|
{ |
|
"epoch": 191.14, |
|
"learning_rate": 8.933444259567387e-07, |
|
"loss": 0.006, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 191.35, |
|
"learning_rate": 8.725457570715475e-07, |
|
"loss": 0.0055, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 191.56, |
|
"learning_rate": 8.517470881863561e-07, |
|
"loss": 0.0058, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 191.76, |
|
"learning_rate": 8.309900166389352e-07, |
|
"loss": 0.0066, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 191.97, |
|
"learning_rate": 8.101913477537439e-07, |
|
"loss": 0.0063, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_cer": 0.2386, |
|
"eval_gen_len": 13.6301, |
|
"eval_loss": 0.46835771203041077, |
|
"eval_runtime": 258.927, |
|
"eval_samples_per_second": 12.382, |
|
"eval_steps_per_second": 3.097, |
|
"step": 461568 |
|
}, |
|
{ |
|
"epoch": 192.18, |
|
"learning_rate": 7.893926788685524e-07, |
|
"loss": 0.0063, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 192.39, |
|
"learning_rate": 7.685940099833611e-07, |
|
"loss": 0.0059, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 192.6, |
|
"learning_rate": 7.477953410981697e-07, |
|
"loss": 0.0058, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 192.8, |
|
"learning_rate": 7.269966722129785e-07, |
|
"loss": 0.0064, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"eval_cer": 0.2377, |
|
"eval_gen_len": 13.6687, |
|
"eval_loss": 0.4715929627418518, |
|
"eval_runtime": 259.4667, |
|
"eval_samples_per_second": 12.356, |
|
"eval_steps_per_second": 3.091, |
|
"step": 463972 |
|
}, |
|
{ |
|
"epoch": 193.01, |
|
"learning_rate": 7.061980033277871e-07, |
|
"loss": 0.0051, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 193.22, |
|
"learning_rate": 6.853993344425957e-07, |
|
"loss": 0.0054, |
|
"step": 464500 |
|
} |
|
], |
|
"max_steps": 480800, |
|
"num_train_epochs": 200, |
|
"total_flos": 2079248374038528.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|