|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "models/pt-ai-detector/checkpoint-62500", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 62500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016, |
|
"grad_norm": 0.02673221565783024, |
|
"learning_rate": 1.9989333333333335e-05, |
|
"loss": 0.109, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0032, |
|
"grad_norm": 0.017086902633309364, |
|
"learning_rate": 1.997888e-05, |
|
"loss": 0.0036, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0048, |
|
"grad_norm": 0.006770299281924963, |
|
"learning_rate": 1.9968213333333335e-05, |
|
"loss": 0.0019, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0064, |
|
"grad_norm": 0.00477579515427351, |
|
"learning_rate": 1.995754666666667e-05, |
|
"loss": 0.0022, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 0.0035180049017071724, |
|
"learning_rate": 1.9946880000000002e-05, |
|
"loss": 0.001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0096, |
|
"grad_norm": 0.003077354747802019, |
|
"learning_rate": 1.9936213333333333e-05, |
|
"loss": 0.0003, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0112, |
|
"grad_norm": 0.0021210976410657167, |
|
"learning_rate": 1.992554666666667e-05, |
|
"loss": 0.0001, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0128, |
|
"grad_norm": 0.001930135884322226, |
|
"learning_rate": 1.991488e-05, |
|
"loss": 0.0001, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0144, |
|
"grad_norm": 0.0014129126211628318, |
|
"learning_rate": 1.9904213333333337e-05, |
|
"loss": 0.0001, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 0.0013116322224959731, |
|
"learning_rate": 1.9893546666666667e-05, |
|
"loss": 0.0001, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0176, |
|
"grad_norm": 0.0009962028125301003, |
|
"learning_rate": 1.988288e-05, |
|
"loss": 0.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0192, |
|
"grad_norm": 0.0008926771697588265, |
|
"learning_rate": 1.9872213333333335e-05, |
|
"loss": 0.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0208, |
|
"grad_norm": 0.0008887408184818923, |
|
"learning_rate": 1.9861546666666668e-05, |
|
"loss": 0.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.0224, |
|
"grad_norm": 0.000690207933075726, |
|
"learning_rate": 1.9850880000000002e-05, |
|
"loss": 0.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 0.000710649008397013, |
|
"learning_rate": 1.9840213333333336e-05, |
|
"loss": 0.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0256, |
|
"grad_norm": 0.0005570728681050241, |
|
"learning_rate": 1.982954666666667e-05, |
|
"loss": 0.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.0272, |
|
"grad_norm": 0.013599707745015621, |
|
"learning_rate": 1.9818880000000003e-05, |
|
"loss": 0.0032, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.0288, |
|
"grad_norm": 0.0010214447975158691, |
|
"learning_rate": 1.9808213333333333e-05, |
|
"loss": 0.0006, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.0304, |
|
"grad_norm": 0.0006235586479306221, |
|
"learning_rate": 1.979754666666667e-05, |
|
"loss": 0.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 0.0006887756753712893, |
|
"learning_rate": 1.978688e-05, |
|
"loss": 0.002, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0336, |
|
"grad_norm": 0.5710021257400513, |
|
"learning_rate": 1.9776213333333334e-05, |
|
"loss": 0.0086, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.0352, |
|
"grad_norm": 0.0023482097312808037, |
|
"learning_rate": 1.9765546666666668e-05, |
|
"loss": 0.003, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.0368, |
|
"grad_norm": 0.0014186076587066054, |
|
"learning_rate": 1.975488e-05, |
|
"loss": 0.0001, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.0384, |
|
"grad_norm": 0.0014550117775797844, |
|
"learning_rate": 1.9744213333333335e-05, |
|
"loss": 0.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.0006836645770817995, |
|
"learning_rate": 1.973354666666667e-05, |
|
"loss": 0.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.0416, |
|
"grad_norm": 0.0005004777340218425, |
|
"learning_rate": 1.9722880000000003e-05, |
|
"loss": 0.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.0432, |
|
"grad_norm": 0.0005360045470297337, |
|
"learning_rate": 1.9712213333333333e-05, |
|
"loss": 0.0005, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.0448, |
|
"grad_norm": 0.0010488297557458282, |
|
"learning_rate": 1.970154666666667e-05, |
|
"loss": 0.0031, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.0464, |
|
"grad_norm": 0.0006056024576537311, |
|
"learning_rate": 1.969088e-05, |
|
"loss": 0.0018, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 0.0004962153034284711, |
|
"learning_rate": 1.9680213333333337e-05, |
|
"loss": 0.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0496, |
|
"grad_norm": 0.00048244796926155686, |
|
"learning_rate": 1.9669546666666667e-05, |
|
"loss": 0.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.0512, |
|
"grad_norm": 0.0003251029411330819, |
|
"learning_rate": 1.965888e-05, |
|
"loss": 0.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.0528, |
|
"grad_norm": 0.0003602537326514721, |
|
"learning_rate": 1.9648213333333335e-05, |
|
"loss": 0.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.0544, |
|
"grad_norm": 0.00031091648270376027, |
|
"learning_rate": 1.963754666666667e-05, |
|
"loss": 0.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.056, |
|
"grad_norm": 0.00021150140673853457, |
|
"learning_rate": 1.9626880000000002e-05, |
|
"loss": 0.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.0576, |
|
"grad_norm": 0.00020159632549621165, |
|
"learning_rate": 1.9616213333333336e-05, |
|
"loss": 0.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.0592, |
|
"grad_norm": 0.00022669663303531706, |
|
"learning_rate": 1.960554666666667e-05, |
|
"loss": 0.0, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.0608, |
|
"grad_norm": 0.00015808363968972117, |
|
"learning_rate": 1.959488e-05, |
|
"loss": 0.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.0624, |
|
"grad_norm": 0.00016346627671737224, |
|
"learning_rate": 1.9584213333333337e-05, |
|
"loss": 0.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 0.00013883988140150905, |
|
"learning_rate": 1.9573546666666667e-05, |
|
"loss": 0.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.0656, |
|
"grad_norm": 0.0001542122772661969, |
|
"learning_rate": 1.956288e-05, |
|
"loss": 0.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.0672, |
|
"grad_norm": 0.00011279522732365876, |
|
"learning_rate": 1.9552213333333334e-05, |
|
"loss": 0.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.0688, |
|
"grad_norm": 0.00010847948578884825, |
|
"learning_rate": 1.9541546666666668e-05, |
|
"loss": 0.0002, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.0704, |
|
"grad_norm": 0.00010385631321696565, |
|
"learning_rate": 1.953088e-05, |
|
"loss": 0.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.072, |
|
"grad_norm": 0.00011526003072503954, |
|
"learning_rate": 1.9520213333333335e-05, |
|
"loss": 0.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.0736, |
|
"grad_norm": 9.515963756712154e-05, |
|
"learning_rate": 1.950954666666667e-05, |
|
"loss": 0.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.0752, |
|
"grad_norm": 9.874672832665965e-05, |
|
"learning_rate": 1.9498880000000003e-05, |
|
"loss": 0.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.0768, |
|
"grad_norm": 7.66971061239019e-05, |
|
"learning_rate": 1.9488213333333333e-05, |
|
"loss": 0.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.0784, |
|
"grad_norm": 7.775246922392398e-05, |
|
"learning_rate": 1.947754666666667e-05, |
|
"loss": 0.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.147744327085093e-05, |
|
"learning_rate": 1.946688e-05, |
|
"loss": 0.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.0816, |
|
"grad_norm": 7.22740005585365e-05, |
|
"learning_rate": 1.9456213333333337e-05, |
|
"loss": 0.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.0832, |
|
"grad_norm": 7.028302206890658e-05, |
|
"learning_rate": 1.9445546666666668e-05, |
|
"loss": 0.0, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.0848, |
|
"grad_norm": 5.9161051467526704e-05, |
|
"learning_rate": 1.943488e-05, |
|
"loss": 0.0, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.0864, |
|
"grad_norm": 6.208459672052413e-05, |
|
"learning_rate": 1.9424213333333335e-05, |
|
"loss": 0.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.088, |
|
"grad_norm": 5.193577453610487e-05, |
|
"learning_rate": 1.941354666666667e-05, |
|
"loss": 0.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.0896, |
|
"grad_norm": 5.222402614890598e-05, |
|
"learning_rate": 1.9402880000000002e-05, |
|
"loss": 0.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.0912, |
|
"grad_norm": 4.8609777877572924e-05, |
|
"learning_rate": 1.9392213333333332e-05, |
|
"loss": 0.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.0928, |
|
"grad_norm": 4.5065498852636665e-05, |
|
"learning_rate": 1.938154666666667e-05, |
|
"loss": 0.0, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.0944, |
|
"grad_norm": 5.239112942945212e-05, |
|
"learning_rate": 1.937088e-05, |
|
"loss": 0.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 3.7742487620562315e-05, |
|
"learning_rate": 1.9360213333333337e-05, |
|
"loss": 0.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.0976, |
|
"grad_norm": 3.911816020263359e-05, |
|
"learning_rate": 1.9349546666666667e-05, |
|
"loss": 0.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.0992, |
|
"grad_norm": 4.1876675823004916e-05, |
|
"learning_rate": 1.933888e-05, |
|
"loss": 0.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.1008, |
|
"grad_norm": 3.35037948389072e-05, |
|
"learning_rate": 1.9328213333333334e-05, |
|
"loss": 0.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.1024, |
|
"grad_norm": 3.4527558455010876e-05, |
|
"learning_rate": 1.9317546666666668e-05, |
|
"loss": 0.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.104, |
|
"grad_norm": 3.605220263125375e-05, |
|
"learning_rate": 1.9306880000000002e-05, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.1056, |
|
"grad_norm": 2.969952765852213e-05, |
|
"learning_rate": 1.9296213333333335e-05, |
|
"loss": 0.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.1072, |
|
"grad_norm": 3.127843956463039e-05, |
|
"learning_rate": 1.928554666666667e-05, |
|
"loss": 0.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.1088, |
|
"grad_norm": 2.8357508199405856e-05, |
|
"learning_rate": 1.9274880000000003e-05, |
|
"loss": 0.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.1104, |
|
"grad_norm": 2.9021906811976805e-05, |
|
"learning_rate": 1.9264213333333336e-05, |
|
"loss": 0.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.112, |
|
"grad_norm": 2.56488910963526e-05, |
|
"learning_rate": 1.925354666666667e-05, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.1136, |
|
"grad_norm": 2.425051025056746e-05, |
|
"learning_rate": 1.924288e-05, |
|
"loss": 0.0, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.1152, |
|
"grad_norm": 2.3250922822626308e-05, |
|
"learning_rate": 1.9232213333333334e-05, |
|
"loss": 0.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.1168, |
|
"grad_norm": 2.17950018850388e-05, |
|
"learning_rate": 1.9221546666666668e-05, |
|
"loss": 0.0, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.1184, |
|
"grad_norm": 1.6155694538610987e-05, |
|
"learning_rate": 1.921088e-05, |
|
"loss": 0.0, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.7884429325931706e-05, |
|
"learning_rate": 1.9200213333333335e-05, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.1216, |
|
"grad_norm": 1.5228806660161354e-05, |
|
"learning_rate": 1.918954666666667e-05, |
|
"loss": 0.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.1232, |
|
"grad_norm": 0.0004236107924953103, |
|
"learning_rate": 1.9178880000000002e-05, |
|
"loss": 0.0082, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.1248, |
|
"grad_norm": 0.001014222507365048, |
|
"learning_rate": 1.9168213333333333e-05, |
|
"loss": 0.0057, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.1264, |
|
"grad_norm": 0.0008165242616087198, |
|
"learning_rate": 1.9157653333333336e-05, |
|
"loss": 0.0029, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 0.0005670466343872249, |
|
"learning_rate": 1.914698666666667e-05, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.1296, |
|
"grad_norm": 0.00030610596877522767, |
|
"learning_rate": 1.9136320000000003e-05, |
|
"loss": 0.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.1312, |
|
"grad_norm": 0.00035054876934736967, |
|
"learning_rate": 1.9125653333333333e-05, |
|
"loss": 0.0, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.1328, |
|
"grad_norm": 0.00023674893600400537, |
|
"learning_rate": 1.9114986666666667e-05, |
|
"loss": 0.0, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.1344, |
|
"grad_norm": 0.00020271481480449438, |
|
"learning_rate": 1.910432e-05, |
|
"loss": 0.0, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.136, |
|
"grad_norm": 0.00012801923730876297, |
|
"learning_rate": 1.9093653333333334e-05, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.1376, |
|
"grad_norm": 0.00013235829828772694, |
|
"learning_rate": 1.9082986666666668e-05, |
|
"loss": 0.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.1392, |
|
"grad_norm": 0.0001435764424968511, |
|
"learning_rate": 1.9072320000000002e-05, |
|
"loss": 0.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.1408, |
|
"grad_norm": 0.00011448346049292013, |
|
"learning_rate": 1.9061653333333335e-05, |
|
"loss": 0.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.1424, |
|
"grad_norm": 0.000109691551188007, |
|
"learning_rate": 1.905098666666667e-05, |
|
"loss": 0.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 0.00011090271436842158, |
|
"learning_rate": 1.904032e-05, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.1456, |
|
"grad_norm": 9.558543388266116e-05, |
|
"learning_rate": 1.9029653333333336e-05, |
|
"loss": 0.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.1472, |
|
"grad_norm": 8.690696267876774e-05, |
|
"learning_rate": 1.9018986666666667e-05, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.1488, |
|
"grad_norm": 6.725907587679103e-05, |
|
"learning_rate": 1.9008320000000004e-05, |
|
"loss": 0.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.1504, |
|
"grad_norm": 6.74678958603181e-05, |
|
"learning_rate": 1.8997653333333334e-05, |
|
"loss": 0.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.152, |
|
"grad_norm": 6.147296517156065e-05, |
|
"learning_rate": 1.8986986666666668e-05, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.1536, |
|
"grad_norm": 5.8914873079629615e-05, |
|
"learning_rate": 1.897632e-05, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.1552, |
|
"grad_norm": 5.48441348655615e-05, |
|
"learning_rate": 1.8965653333333335e-05, |
|
"loss": 0.0, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.1568, |
|
"grad_norm": 6.385787128238007e-05, |
|
"learning_rate": 1.895498666666667e-05, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.1584, |
|
"grad_norm": 5.257723751128651e-05, |
|
"learning_rate": 1.894432e-05, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 5.524126754608005e-05, |
|
"learning_rate": 1.8933653333333336e-05, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.1616, |
|
"grad_norm": 4.3862233724212274e-05, |
|
"learning_rate": 1.8922986666666666e-05, |
|
"loss": 0.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.1632, |
|
"grad_norm": 4.757397255161777e-05, |
|
"learning_rate": 1.8912320000000003e-05, |
|
"loss": 0.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.1648, |
|
"grad_norm": 0.0006927695358172059, |
|
"learning_rate": 1.8901653333333334e-05, |
|
"loss": 0.004, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.1664, |
|
"grad_norm": 0.0002426155551802367, |
|
"learning_rate": 1.8890986666666667e-05, |
|
"loss": 0.0, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.168, |
|
"grad_norm": 0.0002200150629505515, |
|
"learning_rate": 1.888032e-05, |
|
"loss": 0.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.1696, |
|
"grad_norm": 0.00017707289953250438, |
|
"learning_rate": 1.8869653333333335e-05, |
|
"loss": 0.0, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.1712, |
|
"grad_norm": 0.000118190873763524, |
|
"learning_rate": 1.8858986666666668e-05, |
|
"loss": 0.0, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.1728, |
|
"grad_norm": 0.0001291615335503593, |
|
"learning_rate": 1.8848320000000002e-05, |
|
"loss": 0.0, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.1744, |
|
"grad_norm": 0.00015779025852680206, |
|
"learning_rate": 1.883808e-05, |
|
"loss": 0.0415, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.176, |
|
"grad_norm": 0.0010131917661055923, |
|
"learning_rate": 1.8827413333333334e-05, |
|
"loss": 0.0044, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.1776, |
|
"grad_norm": 0.00028366921469569206, |
|
"learning_rate": 1.8816746666666668e-05, |
|
"loss": 0.0, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.1792, |
|
"grad_norm": 0.00023540180700365454, |
|
"learning_rate": 1.880608e-05, |
|
"loss": 0.0, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.1808, |
|
"grad_norm": 0.00020333370775915682, |
|
"learning_rate": 1.8795413333333335e-05, |
|
"loss": 0.0, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.1824, |
|
"grad_norm": 0.0010605982970446348, |
|
"learning_rate": 1.878474666666667e-05, |
|
"loss": 0.0, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.184, |
|
"grad_norm": 0.0002904959546867758, |
|
"learning_rate": 1.8774080000000002e-05, |
|
"loss": 0.0004, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.1856, |
|
"grad_norm": 0.00024336307251360267, |
|
"learning_rate": 1.8763413333333336e-05, |
|
"loss": 0.0, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.1872, |
|
"grad_norm": 0.0001343002077192068, |
|
"learning_rate": 1.8752746666666666e-05, |
|
"loss": 0.0, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.1888, |
|
"grad_norm": 9.551690163789317e-05, |
|
"learning_rate": 1.8742080000000003e-05, |
|
"loss": 0.0, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.1904, |
|
"grad_norm": 7.702928996877745e-05, |
|
"learning_rate": 1.8731413333333334e-05, |
|
"loss": 0.0, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 8.958076796261594e-05, |
|
"learning_rate": 1.872074666666667e-05, |
|
"loss": 0.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.1936, |
|
"grad_norm": 7.529326830990613e-05, |
|
"learning_rate": 1.871008e-05, |
|
"loss": 0.0, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.1952, |
|
"grad_norm": 5.7092009228654206e-05, |
|
"learning_rate": 1.8699413333333335e-05, |
|
"loss": 0.0, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.1968, |
|
"grad_norm": 5.631962267216295e-05, |
|
"learning_rate": 1.8688746666666668e-05, |
|
"loss": 0.0, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.1984, |
|
"grad_norm": 5.422734466264956e-05, |
|
"learning_rate": 1.8678080000000002e-05, |
|
"loss": 0.0, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 9.995359869208187e-05, |
|
"learning_rate": 1.8667413333333336e-05, |
|
"loss": 0.0, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.2016, |
|
"grad_norm": 4.6004188334336504e-05, |
|
"learning_rate": 1.8656746666666666e-05, |
|
"loss": 0.0, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.2032, |
|
"grad_norm": 4.585070564644411e-05, |
|
"learning_rate": 1.8646080000000003e-05, |
|
"loss": 0.0, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.2048, |
|
"grad_norm": 4.063411324750632e-05, |
|
"learning_rate": 1.8635413333333333e-05, |
|
"loss": 0.0, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.2064, |
|
"grad_norm": 4.030313721159473e-05, |
|
"learning_rate": 1.862474666666667e-05, |
|
"loss": 0.0, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.208, |
|
"grad_norm": 3.6798068322241306e-05, |
|
"learning_rate": 1.861408e-05, |
|
"loss": 0.0, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.2096, |
|
"grad_norm": 3.58178440365009e-05, |
|
"learning_rate": 1.8603413333333334e-05, |
|
"loss": 0.0, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.2112, |
|
"grad_norm": 3.633175219874829e-05, |
|
"learning_rate": 1.8592746666666668e-05, |
|
"loss": 0.0, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.2128, |
|
"grad_norm": 2.833498001564294e-05, |
|
"learning_rate": 1.858208e-05, |
|
"loss": 0.0, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.2144, |
|
"grad_norm": 3.0395483918255195e-05, |
|
"learning_rate": 1.8571413333333335e-05, |
|
"loss": 0.0, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.216, |
|
"grad_norm": 2.7293002858641557e-05, |
|
"learning_rate": 1.856074666666667e-05, |
|
"loss": 0.0, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.2176, |
|
"grad_norm": 2.7100266379420646e-05, |
|
"learning_rate": 1.8550080000000002e-05, |
|
"loss": 0.0, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.2192, |
|
"grad_norm": 2.9992290365044028e-05, |
|
"learning_rate": 1.8539413333333336e-05, |
|
"loss": 0.0, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.2208, |
|
"grad_norm": 2.895161924243439e-05, |
|
"learning_rate": 1.852874666666667e-05, |
|
"loss": 0.0, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.2224, |
|
"grad_norm": 2.4209704861277714e-05, |
|
"learning_rate": 1.851808e-05, |
|
"loss": 0.0, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 2.0423936803126708e-05, |
|
"learning_rate": 1.8507413333333334e-05, |
|
"loss": 0.0, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.2256, |
|
"grad_norm": 1.81021387106739e-05, |
|
"learning_rate": 1.8496746666666667e-05, |
|
"loss": 0.0, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.2272, |
|
"grad_norm": 1.9862713088514283e-05, |
|
"learning_rate": 1.848608e-05, |
|
"loss": 0.0, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.2288, |
|
"grad_norm": 1.785710992407985e-05, |
|
"learning_rate": 1.8475413333333335e-05, |
|
"loss": 0.0, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.2304, |
|
"grad_norm": 1.59865176101448e-05, |
|
"learning_rate": 1.846474666666667e-05, |
|
"loss": 0.0, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.232, |
|
"grad_norm": 1.3941355064162053e-05, |
|
"learning_rate": 1.8454080000000002e-05, |
|
"loss": 0.0, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.2336, |
|
"grad_norm": 1.572091605339665e-05, |
|
"learning_rate": 1.8443413333333336e-05, |
|
"loss": 0.0, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.2352, |
|
"grad_norm": 1.866888123913668e-05, |
|
"learning_rate": 1.843274666666667e-05, |
|
"loss": 0.0, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.2368, |
|
"grad_norm": 1.7219248547917232e-05, |
|
"learning_rate": 1.8422080000000003e-05, |
|
"loss": 0.0, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.2384, |
|
"grad_norm": 1.6731108189560473e-05, |
|
"learning_rate": 1.8411413333333333e-05, |
|
"loss": 0.0, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.463459284423152e-05, |
|
"learning_rate": 1.840074666666667e-05, |
|
"loss": 0.0, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.2416, |
|
"grad_norm": 1.5301053281291388e-05, |
|
"learning_rate": 1.839008e-05, |
|
"loss": 0.0, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.2432, |
|
"grad_norm": 1.1437626199040096e-05, |
|
"learning_rate": 1.8379413333333334e-05, |
|
"loss": 0.0, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.2448, |
|
"grad_norm": 1.2994928511034232e-05, |
|
"learning_rate": 1.8368746666666668e-05, |
|
"loss": 0.0, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.2464, |
|
"grad_norm": 1.1984897355432622e-05, |
|
"learning_rate": 1.835808e-05, |
|
"loss": 0.0, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.248, |
|
"grad_norm": 1.0545180884946603e-05, |
|
"learning_rate": 1.8347413333333335e-05, |
|
"loss": 0.0, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.2496, |
|
"grad_norm": 1.0379474588262383e-05, |
|
"learning_rate": 1.8336746666666666e-05, |
|
"loss": 0.0, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.2512, |
|
"grad_norm": 9.962018339138012e-06, |
|
"learning_rate": 1.8326080000000003e-05, |
|
"loss": 0.0, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.2528, |
|
"grad_norm": 1.2507619430834893e-05, |
|
"learning_rate": 1.8315413333333333e-05, |
|
"loss": 0.0, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.2544, |
|
"grad_norm": 1.1630279914243147e-05, |
|
"learning_rate": 1.830474666666667e-05, |
|
"loss": 0.0, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 8.16356077848468e-06, |
|
"learning_rate": 1.829408e-05, |
|
"loss": 0.0, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2576, |
|
"grad_norm": 9.179115295410156e-06, |
|
"learning_rate": 1.8283413333333334e-05, |
|
"loss": 0.0, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.2592, |
|
"grad_norm": 7.899307092884555e-06, |
|
"learning_rate": 1.8272746666666668e-05, |
|
"loss": 0.0, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.2608, |
|
"grad_norm": 8.934203833632637e-06, |
|
"learning_rate": 1.826208e-05, |
|
"loss": 0.0, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.2624, |
|
"grad_norm": 6.883318292238982e-06, |
|
"learning_rate": 1.8251413333333335e-05, |
|
"loss": 0.0, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.264, |
|
"grad_norm": 6.63133641864988e-06, |
|
"learning_rate": 1.824074666666667e-05, |
|
"loss": 0.0, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.2656, |
|
"grad_norm": 7.2985894803423434e-06, |
|
"learning_rate": 1.8230080000000002e-05, |
|
"loss": 0.0, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.2672, |
|
"grad_norm": 6.7308114921615925e-06, |
|
"learning_rate": 1.8219413333333336e-05, |
|
"loss": 0.0, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.2688, |
|
"grad_norm": 7.756932973279618e-06, |
|
"learning_rate": 1.820874666666667e-05, |
|
"loss": 0.0, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.2704, |
|
"grad_norm": 6.190203748701606e-06, |
|
"learning_rate": 1.8198080000000003e-05, |
|
"loss": 0.0, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.272, |
|
"grad_norm": 6.483978722826578e-06, |
|
"learning_rate": 1.8187413333333333e-05, |
|
"loss": 0.0, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.2736, |
|
"grad_norm": 5.759993655374274e-06, |
|
"learning_rate": 1.817674666666667e-05, |
|
"loss": 0.0, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.2752, |
|
"grad_norm": 5.414194674813189e-06, |
|
"learning_rate": 1.816608e-05, |
|
"loss": 0.0, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.2768, |
|
"grad_norm": 6.183955520100426e-06, |
|
"learning_rate": 1.8155413333333334e-05, |
|
"loss": 0.0, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.2784, |
|
"grad_norm": 5.614017936750315e-06, |
|
"learning_rate": 1.8144746666666668e-05, |
|
"loss": 0.0, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.546806849248242e-06, |
|
"learning_rate": 1.8134080000000002e-05, |
|
"loss": 0.0, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.2816, |
|
"grad_norm": 5.248873549135169e-06, |
|
"learning_rate": 1.8123413333333335e-05, |
|
"loss": 0.0, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.2832, |
|
"grad_norm": 5.707049695047317e-06, |
|
"learning_rate": 1.811274666666667e-05, |
|
"loss": 0.0, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.2848, |
|
"grad_norm": 4.1388898353034165e-06, |
|
"learning_rate": 1.8102080000000003e-05, |
|
"loss": 0.0, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.2864, |
|
"grad_norm": 4.138165422773454e-06, |
|
"learning_rate": 1.8091413333333333e-05, |
|
"loss": 0.0, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 4.807816367247142e-06, |
|
"learning_rate": 1.808074666666667e-05, |
|
"loss": 0.0, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.2896, |
|
"grad_norm": 3.953806299250573e-06, |
|
"learning_rate": 1.807008e-05, |
|
"loss": 0.0, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.2912, |
|
"grad_norm": 4.514291049417807e-06, |
|
"learning_rate": 1.8059413333333334e-05, |
|
"loss": 0.0, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.2928, |
|
"grad_norm": 3.86543206332135e-06, |
|
"learning_rate": 1.8048746666666668e-05, |
|
"loss": 0.0, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.2944, |
|
"grad_norm": 4.020656433567638e-06, |
|
"learning_rate": 1.803808e-05, |
|
"loss": 0.0, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.296, |
|
"grad_norm": 3.020624490090995e-06, |
|
"learning_rate": 1.8027413333333335e-05, |
|
"loss": 0.0, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.2976, |
|
"grad_norm": 3.109249746557907e-06, |
|
"learning_rate": 1.801674666666667e-05, |
|
"loss": 0.0, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.2992, |
|
"grad_norm": 3.1550080166198313e-06, |
|
"learning_rate": 1.8006080000000002e-05, |
|
"loss": 0.0, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.3008, |
|
"grad_norm": 2.926373326772591e-06, |
|
"learning_rate": 1.7995413333333336e-05, |
|
"loss": 0.0, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.3024, |
|
"grad_norm": 2.828928245435236e-06, |
|
"learning_rate": 1.798474666666667e-05, |
|
"loss": 0.0, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.304, |
|
"grad_norm": 2.5713843569974415e-06, |
|
"learning_rate": 1.797408e-05, |
|
"loss": 0.0, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.3056, |
|
"grad_norm": 2.677134943951387e-06, |
|
"learning_rate": 1.7963413333333334e-05, |
|
"loss": 0.0, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.3072, |
|
"grad_norm": 2.3387976852973225e-06, |
|
"learning_rate": 1.7952746666666667e-05, |
|
"loss": 0.0, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.3088, |
|
"grad_norm": 2.2587300918530673e-06, |
|
"learning_rate": 1.794208e-05, |
|
"loss": 0.0, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.3104, |
|
"grad_norm": 2.2797592009737855e-06, |
|
"learning_rate": 1.7931413333333335e-05, |
|
"loss": 0.0, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.312, |
|
"grad_norm": 2.516329004720319e-06, |
|
"learning_rate": 1.7920746666666668e-05, |
|
"loss": 0.0, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.3136, |
|
"grad_norm": 2.1884568468522048e-06, |
|
"learning_rate": 1.7910080000000002e-05, |
|
"loss": 0.0, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.3152, |
|
"grad_norm": 1.7737016833052621e-06, |
|
"learning_rate": 1.7899413333333335e-05, |
|
"loss": 0.0, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.3168, |
|
"grad_norm": 1.8436692243994912e-06, |
|
"learning_rate": 1.788874666666667e-05, |
|
"loss": 0.0, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.3184, |
|
"grad_norm": 2.1666396605724003e-06, |
|
"learning_rate": 1.7878080000000003e-05, |
|
"loss": 0.0, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.988582653211779e-06, |
|
"learning_rate": 1.7867413333333333e-05, |
|
"loss": 0.0, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.3216, |
|
"grad_norm": 1.9917110876122024e-06, |
|
"learning_rate": 1.785674666666667e-05, |
|
"loss": 0.0, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.3232, |
|
"grad_norm": 1.605521674719057e-06, |
|
"learning_rate": 1.7846186666666667e-05, |
|
"loss": 0.0013, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.3248, |
|
"grad_norm": 2.1161680706427433e-05, |
|
"learning_rate": 1.7835733333333333e-05, |
|
"loss": 0.0223, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.3264, |
|
"grad_norm": 0.0008167960331775248, |
|
"learning_rate": 1.782506666666667e-05, |
|
"loss": 0.0106, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.328, |
|
"grad_norm": 0.00030075563699938357, |
|
"learning_rate": 1.78144e-05, |
|
"loss": 0.0004, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.3296, |
|
"grad_norm": 0.00021357230434659868, |
|
"learning_rate": 1.7803733333333334e-05, |
|
"loss": 0.0, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.3312, |
|
"grad_norm": 0.0021272392477840185, |
|
"learning_rate": 1.7793066666666667e-05, |
|
"loss": 0.005, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.3328, |
|
"grad_norm": 0.00031995793688111007, |
|
"learning_rate": 1.77824e-05, |
|
"loss": 0.0, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.3344, |
|
"grad_norm": 0.00017301621846854687, |
|
"learning_rate": 1.7771733333333335e-05, |
|
"loss": 0.0001, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 0.00015349338355008513, |
|
"learning_rate": 1.776106666666667e-05, |
|
"loss": 0.0, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.3376, |
|
"grad_norm": 0.00011431697930675, |
|
"learning_rate": 1.7750400000000002e-05, |
|
"loss": 0.0, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.3392, |
|
"grad_norm": 0.00010969273716909811, |
|
"learning_rate": 1.7739733333333336e-05, |
|
"loss": 0.0, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.3408, |
|
"grad_norm": 9.16854478418827e-05, |
|
"learning_rate": 1.772906666666667e-05, |
|
"loss": 0.0, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.3424, |
|
"grad_norm": 7.963561074575409e-05, |
|
"learning_rate": 1.7718400000000003e-05, |
|
"loss": 0.0, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.344, |
|
"grad_norm": 7.331543747568503e-05, |
|
"learning_rate": 1.7707733333333333e-05, |
|
"loss": 0.0, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.3456, |
|
"grad_norm": 6.28735069767572e-05, |
|
"learning_rate": 1.769706666666667e-05, |
|
"loss": 0.0, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.3472, |
|
"grad_norm": 5.47724193893373e-05, |
|
"learning_rate": 1.76864e-05, |
|
"loss": 0.0, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.3488, |
|
"grad_norm": 5.209392838878557e-05, |
|
"learning_rate": 1.7675733333333334e-05, |
|
"loss": 0.0, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.3504, |
|
"grad_norm": 5.173907629796304e-05, |
|
"learning_rate": 1.7665066666666668e-05, |
|
"loss": 0.0, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 5.027300721849315e-05, |
|
"learning_rate": 1.76544e-05, |
|
"loss": 0.0, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.3536, |
|
"grad_norm": 4.8056979721877724e-05, |
|
"learning_rate": 1.7643733333333335e-05, |
|
"loss": 0.0, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.3552, |
|
"grad_norm": 4.373279443825595e-05, |
|
"learning_rate": 1.7633066666666666e-05, |
|
"loss": 0.0, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.3568, |
|
"grad_norm": 4.0998969780048355e-05, |
|
"learning_rate": 1.7622400000000003e-05, |
|
"loss": 0.0, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.3584, |
|
"grad_norm": 3.9302074583247304e-05, |
|
"learning_rate": 1.7611733333333333e-05, |
|
"loss": 0.0, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.604054290917702e-05, |
|
"learning_rate": 1.760106666666667e-05, |
|
"loss": 0.0, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.3616, |
|
"grad_norm": 3.515103890094906e-05, |
|
"learning_rate": 1.75904e-05, |
|
"loss": 0.0, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.3632, |
|
"grad_norm": 3.248384018661454e-05, |
|
"learning_rate": 1.7579733333333334e-05, |
|
"loss": 0.0, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.3648, |
|
"grad_norm": 3.285553611931391e-05, |
|
"learning_rate": 1.7569066666666667e-05, |
|
"loss": 0.0, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.3664, |
|
"grad_norm": 2.9991715564392507e-05, |
|
"learning_rate": 1.75584e-05, |
|
"loss": 0.0, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.368, |
|
"grad_norm": 2.546398900449276e-05, |
|
"learning_rate": 1.7547733333333335e-05, |
|
"loss": 0.0, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.3696, |
|
"grad_norm": 2.696343290153891e-05, |
|
"learning_rate": 1.753706666666667e-05, |
|
"loss": 0.0, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.3712, |
|
"grad_norm": 2.5436993382754736e-05, |
|
"learning_rate": 1.7526400000000002e-05, |
|
"loss": 0.0, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.3728, |
|
"grad_norm": 2.3871341909398325e-05, |
|
"learning_rate": 1.7515733333333336e-05, |
|
"loss": 0.0, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.3744, |
|
"grad_norm": 2.4206261514336802e-05, |
|
"learning_rate": 1.750506666666667e-05, |
|
"loss": 0.0, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.376, |
|
"grad_norm": 2.1603356799460016e-05, |
|
"learning_rate": 1.7494400000000003e-05, |
|
"loss": 0.0, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.3776, |
|
"grad_norm": 2.217761539213825e-05, |
|
"learning_rate": 1.7483733333333333e-05, |
|
"loss": 0.0, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.3792, |
|
"grad_norm": 2.0118233805987984e-05, |
|
"learning_rate": 1.7473066666666667e-05, |
|
"loss": 0.0, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.3808, |
|
"grad_norm": 1.8580089090391994e-05, |
|
"learning_rate": 1.74624e-05, |
|
"loss": 0.0, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.3824, |
|
"grad_norm": 2.0107261661905795e-05, |
|
"learning_rate": 1.7451733333333334e-05, |
|
"loss": 0.0, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 1.8376658772467636e-05, |
|
"learning_rate": 1.7441066666666668e-05, |
|
"loss": 0.0, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.3856, |
|
"grad_norm": 1.9588029317674227e-05, |
|
"learning_rate": 1.7430400000000002e-05, |
|
"loss": 0.0, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.3872, |
|
"grad_norm": 1.579835770826321e-05, |
|
"learning_rate": 1.7419733333333335e-05, |
|
"loss": 0.0, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.3888, |
|
"grad_norm": 1.5347810403909534e-05, |
|
"learning_rate": 1.740906666666667e-05, |
|
"loss": 0.0, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.3904, |
|
"grad_norm": 1.552935464133043e-05, |
|
"learning_rate": 1.7398400000000003e-05, |
|
"loss": 0.0, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.392, |
|
"grad_norm": 1.485102347942302e-05, |
|
"learning_rate": 1.7387733333333333e-05, |
|
"loss": 0.0, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.3936, |
|
"grad_norm": 1.4200339137460105e-05, |
|
"learning_rate": 1.737706666666667e-05, |
|
"loss": 0.0, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.3952, |
|
"grad_norm": 1.186304052680498e-05, |
|
"learning_rate": 1.73664e-05, |
|
"loss": 0.0, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.3968, |
|
"grad_norm": 1.2428082300175447e-05, |
|
"learning_rate": 1.7355733333333334e-05, |
|
"loss": 0.0, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.3984, |
|
"grad_norm": 1.168328890344128e-05, |
|
"learning_rate": 1.7345066666666668e-05, |
|
"loss": 0.0, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.1338147487549577e-05, |
|
"learning_rate": 1.73344e-05, |
|
"loss": 0.0, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.4016, |
|
"grad_norm": 1.0833624401129782e-05, |
|
"learning_rate": 1.7323733333333335e-05, |
|
"loss": 0.0, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.4032, |
|
"grad_norm": 1.1126129720651079e-05, |
|
"learning_rate": 1.731306666666667e-05, |
|
"loss": 0.0, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.4048, |
|
"grad_norm": 1.071280894393567e-05, |
|
"learning_rate": 1.730250666666667e-05, |
|
"loss": 0.0004, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.4064, |
|
"grad_norm": 8.747599167691078e-06, |
|
"learning_rate": 1.7291840000000002e-05, |
|
"loss": 0.0, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.408, |
|
"grad_norm": 9.405779564986005e-06, |
|
"learning_rate": 1.7281173333333336e-05, |
|
"loss": 0.0, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.4096, |
|
"grad_norm": 9.353114364785142e-06, |
|
"learning_rate": 1.727050666666667e-05, |
|
"loss": 0.0, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.4112, |
|
"grad_norm": 8.207565770135261e-06, |
|
"learning_rate": 1.725984e-05, |
|
"loss": 0.0, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.4128, |
|
"grad_norm": 8.460830940748565e-06, |
|
"learning_rate": 1.7249173333333337e-05, |
|
"loss": 0.0, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.4144, |
|
"grad_norm": 7.834319148969371e-06, |
|
"learning_rate": 1.7238506666666667e-05, |
|
"loss": 0.0, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 7.789364644850139e-06, |
|
"learning_rate": 1.722784e-05, |
|
"loss": 0.0, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.4176, |
|
"grad_norm": 7.643193384865299e-06, |
|
"learning_rate": 1.7217173333333334e-05, |
|
"loss": 0.0, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.4192, |
|
"grad_norm": 6.989634584897431e-06, |
|
"learning_rate": 1.7206506666666668e-05, |
|
"loss": 0.0, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.4208, |
|
"grad_norm": 6.504462362499908e-06, |
|
"learning_rate": 1.7195840000000002e-05, |
|
"loss": 0.0, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.4224, |
|
"grad_norm": 6.5619133238215e-06, |
|
"learning_rate": 1.7185173333333335e-05, |
|
"loss": 0.0, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.424, |
|
"grad_norm": 6.538786692544818e-06, |
|
"learning_rate": 1.717450666666667e-05, |
|
"loss": 0.0, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.4256, |
|
"grad_norm": 5.952290393906878e-06, |
|
"learning_rate": 1.716384e-05, |
|
"loss": 0.0, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.4272, |
|
"grad_norm": 5.890806733077625e-06, |
|
"learning_rate": 1.7153173333333336e-05, |
|
"loss": 0.0, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.4288, |
|
"grad_norm": 5.5425175560230855e-06, |
|
"learning_rate": 1.7142506666666667e-05, |
|
"loss": 0.0, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.4304, |
|
"grad_norm": 5.190961019252427e-06, |
|
"learning_rate": 1.713184e-05, |
|
"loss": 0.0, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 5.312633220455609e-06, |
|
"learning_rate": 1.7121173333333334e-05, |
|
"loss": 0.0, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.4336, |
|
"grad_norm": 4.84698239233694e-06, |
|
"learning_rate": 1.7110506666666668e-05, |
|
"loss": 0.0, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.4352, |
|
"grad_norm": 4.822264145332156e-06, |
|
"learning_rate": 1.709984e-05, |
|
"loss": 0.0, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.4368, |
|
"grad_norm": 5.0093335630663205e-06, |
|
"learning_rate": 1.7089173333333335e-05, |
|
"loss": 0.0, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.4384, |
|
"grad_norm": 4.560034540190827e-06, |
|
"learning_rate": 1.707850666666667e-05, |
|
"loss": 0.0, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 4.285787326807622e-06, |
|
"learning_rate": 1.7067840000000002e-05, |
|
"loss": 0.0, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.4416, |
|
"grad_norm": 4.18266017732094e-06, |
|
"learning_rate": 1.7057173333333336e-05, |
|
"loss": 0.0, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.4432, |
|
"grad_norm": 3.8072735151217785e-06, |
|
"learning_rate": 1.704650666666667e-05, |
|
"loss": 0.0, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.4448, |
|
"grad_norm": 3.73259058505937e-06, |
|
"learning_rate": 1.703584e-05, |
|
"loss": 0.0, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.4464, |
|
"grad_norm": 3.7710021842940478e-06, |
|
"learning_rate": 1.7025173333333337e-05, |
|
"loss": 0.0, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 3.7697877814935055e-06, |
|
"learning_rate": 1.7014506666666667e-05, |
|
"loss": 0.0, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.4496, |
|
"grad_norm": 3.442883553361753e-06, |
|
"learning_rate": 1.700384e-05, |
|
"loss": 0.0, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.4512, |
|
"grad_norm": 3.407572421565419e-06, |
|
"learning_rate": 1.6993173333333334e-05, |
|
"loss": 0.0, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.4528, |
|
"grad_norm": 3.369181740708882e-06, |
|
"learning_rate": 1.6982506666666668e-05, |
|
"loss": 0.0, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.4544, |
|
"grad_norm": 3.2553843993810005e-06, |
|
"learning_rate": 1.6971840000000002e-05, |
|
"loss": 0.0, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.456, |
|
"grad_norm": 2.9325344712560764e-06, |
|
"learning_rate": 1.6961173333333335e-05, |
|
"loss": 0.0, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.4576, |
|
"grad_norm": 2.814329036482377e-06, |
|
"learning_rate": 1.695050666666667e-05, |
|
"loss": 0.0, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.4592, |
|
"grad_norm": 2.932674760813825e-06, |
|
"learning_rate": 1.693984e-05, |
|
"loss": 0.0, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.4608, |
|
"grad_norm": 2.8238118829904124e-06, |
|
"learning_rate": 1.6929173333333336e-05, |
|
"loss": 0.0, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.4624, |
|
"grad_norm": 2.491308805474546e-06, |
|
"learning_rate": 1.6918506666666667e-05, |
|
"loss": 0.0, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.464, |
|
"grad_norm": 2.2496963083540322e-06, |
|
"learning_rate": 1.690784e-05, |
|
"loss": 0.0, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.4656, |
|
"grad_norm": 2.2353810891218018e-06, |
|
"learning_rate": 1.6897173333333334e-05, |
|
"loss": 0.0, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.4672, |
|
"grad_norm": 2.2871329292684095e-06, |
|
"learning_rate": 1.6886506666666668e-05, |
|
"loss": 0.0, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.4688, |
|
"grad_norm": 2.0240684079908533e-06, |
|
"learning_rate": 1.687584e-05, |
|
"loss": 0.0, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.4704, |
|
"grad_norm": 2.1123094029462663e-06, |
|
"learning_rate": 1.6865173333333335e-05, |
|
"loss": 0.0, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.472, |
|
"grad_norm": 1.2771483852702659e-06, |
|
"learning_rate": 1.685450666666667e-05, |
|
"loss": 0.0, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.4736, |
|
"grad_norm": 1.6413683852078975e-06, |
|
"learning_rate": 1.6843840000000002e-05, |
|
"loss": 0.0, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.4752, |
|
"grad_norm": 1.272170607080625e-06, |
|
"learning_rate": 1.6833173333333336e-05, |
|
"loss": 0.0, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.4768, |
|
"grad_norm": 1.5857983726164093e-06, |
|
"learning_rate": 1.6822506666666666e-05, |
|
"loss": 0.0, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.4784, |
|
"grad_norm": 9.906548257276881e-07, |
|
"learning_rate": 1.681184e-05, |
|
"loss": 0.0, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.2207430017951992e-06, |
|
"learning_rate": 1.6801173333333334e-05, |
|
"loss": 0.0, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.4816, |
|
"grad_norm": 1.0915376833509072e-06, |
|
"learning_rate": 1.6790506666666667e-05, |
|
"loss": 0.0, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.4832, |
|
"grad_norm": 9.879134950097068e-07, |
|
"learning_rate": 1.677984e-05, |
|
"loss": 0.0, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.4848, |
|
"grad_norm": 9.498847930444754e-07, |
|
"learning_rate": 1.6769173333333335e-05, |
|
"loss": 0.0, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.4864, |
|
"grad_norm": 1.0251242201775312e-06, |
|
"learning_rate": 1.6758506666666668e-05, |
|
"loss": 0.0, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.488, |
|
"grad_norm": 1.0169778761337511e-06, |
|
"learning_rate": 1.6747840000000002e-05, |
|
"loss": 0.0, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.4896, |
|
"grad_norm": 8.809812470644829e-07, |
|
"learning_rate": 1.6737173333333336e-05, |
|
"loss": 0.0, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.4912, |
|
"grad_norm": 5.986374276290007e-07, |
|
"learning_rate": 1.672650666666667e-05, |
|
"loss": 0.0, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.4928, |
|
"grad_norm": 7.004752546890813e-07, |
|
"learning_rate": 1.671584e-05, |
|
"loss": 0.0, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.4944, |
|
"grad_norm": 6.413148412320879e-07, |
|
"learning_rate": 1.6705173333333337e-05, |
|
"loss": 0.0, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.496, |
|
"grad_norm": 5.724415927943483e-07, |
|
"learning_rate": 1.6694506666666667e-05, |
|
"loss": 0.0, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.4976, |
|
"grad_norm": 6.884836238896241e-07, |
|
"learning_rate": 1.6683840000000004e-05, |
|
"loss": 0.0, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.4992, |
|
"grad_norm": 6.260689247028495e-07, |
|
"learning_rate": 1.6673173333333334e-05, |
|
"loss": 0.0, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.5008, |
|
"grad_norm": 5.333803301255102e-07, |
|
"learning_rate": 1.6662506666666668e-05, |
|
"loss": 0.0, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.5024, |
|
"grad_norm": 7.12101609678939e-07, |
|
"learning_rate": 1.665184e-05, |
|
"loss": 0.0, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.504, |
|
"grad_norm": 5.5784983032936e-07, |
|
"learning_rate": 1.6641173333333335e-05, |
|
"loss": 0.0, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.5056, |
|
"grad_norm": 5.744473696722707e-07, |
|
"learning_rate": 1.663050666666667e-05, |
|
"loss": 0.0, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.5072, |
|
"grad_norm": 5.516257601811958e-07, |
|
"learning_rate": 1.661984e-05, |
|
"loss": 0.0, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.5088, |
|
"grad_norm": 5.83314033519855e-07, |
|
"learning_rate": 1.6609173333333336e-05, |
|
"loss": 0.0, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.5104, |
|
"grad_norm": 4.716930277481879e-07, |
|
"learning_rate": 1.6598506666666666e-05, |
|
"loss": 0.0, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 4.2959382540175284e-07, |
|
"learning_rate": 1.658784e-05, |
|
"loss": 0.0, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.5136, |
|
"grad_norm": 4.2993829652004933e-07, |
|
"learning_rate": 1.6577173333333334e-05, |
|
"loss": 0.0, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.5152, |
|
"grad_norm": 4.2280382217541046e-07, |
|
"learning_rate": 1.6566506666666667e-05, |
|
"loss": 0.0, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.5168, |
|
"grad_norm": 3.871925855492009e-07, |
|
"learning_rate": 1.655584e-05, |
|
"loss": 0.0, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.5184, |
|
"grad_norm": 4.4411478938855e-07, |
|
"learning_rate": 1.6545173333333335e-05, |
|
"loss": 0.0, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.928717831058748e-07, |
|
"learning_rate": 1.653450666666667e-05, |
|
"loss": 0.0, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.5216, |
|
"grad_norm": 4.015236640952935e-07, |
|
"learning_rate": 1.6523840000000002e-05, |
|
"loss": 0.0, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.5232, |
|
"grad_norm": 4.0636822973283415e-07, |
|
"learning_rate": 1.6513173333333336e-05, |
|
"loss": 0.0, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.5248, |
|
"grad_norm": 3.822638632300368e-07, |
|
"learning_rate": 1.650250666666667e-05, |
|
"loss": 0.0, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.5264, |
|
"grad_norm": 3.445758522957476e-07, |
|
"learning_rate": 1.649184e-05, |
|
"loss": 0.0, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 3.2311257314177055e-07, |
|
"learning_rate": 1.6481173333333337e-05, |
|
"loss": 0.0, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.5296, |
|
"grad_norm": 3.157684318466636e-07, |
|
"learning_rate": 1.6470506666666667e-05, |
|
"loss": 0.0, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.5312, |
|
"grad_norm": 2.9879865337534284e-07, |
|
"learning_rate": 1.645984e-05, |
|
"loss": 0.0, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.5328, |
|
"grad_norm": 3.35712599053295e-07, |
|
"learning_rate": 1.6449173333333334e-05, |
|
"loss": 0.0, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.5344, |
|
"grad_norm": 2.768248918982863e-07, |
|
"learning_rate": 1.6438506666666668e-05, |
|
"loss": 0.0, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.536, |
|
"grad_norm": 2.875537177260412e-07, |
|
"learning_rate": 1.642784e-05, |
|
"loss": 0.0, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.5376, |
|
"grad_norm": 2.9602924200844427e-07, |
|
"learning_rate": 1.6417173333333335e-05, |
|
"loss": 0.0, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.5392, |
|
"grad_norm": 3.1393423682857247e-07, |
|
"learning_rate": 1.640650666666667e-05, |
|
"loss": 0.0, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.5408, |
|
"grad_norm": 2.491387078862317e-07, |
|
"learning_rate": 1.639584e-05, |
|
"loss": 0.0, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.5424, |
|
"grad_norm": 2.3757078793096298e-07, |
|
"learning_rate": 1.6385173333333336e-05, |
|
"loss": 0.0, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 2.5188003860421304e-07, |
|
"learning_rate": 1.6374506666666667e-05, |
|
"loss": 0.0, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.5456, |
|
"grad_norm": 2.4522555008843483e-07, |
|
"learning_rate": 1.6363840000000004e-05, |
|
"loss": 0.0, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.5472, |
|
"grad_norm": 2.2814373323853943e-07, |
|
"learning_rate": 1.6353173333333334e-05, |
|
"loss": 0.0, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.5488, |
|
"grad_norm": 2.2816611533471587e-07, |
|
"learning_rate": 1.6342506666666668e-05, |
|
"loss": 0.0, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.5504, |
|
"grad_norm": 2.4142485699485405e-07, |
|
"learning_rate": 1.633184e-05, |
|
"loss": 0.0, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.552, |
|
"grad_norm": 2.420672728931095e-07, |
|
"learning_rate": 1.6321173333333335e-05, |
|
"loss": 0.0, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.5536, |
|
"grad_norm": 1.964133815590685e-07, |
|
"learning_rate": 1.631050666666667e-05, |
|
"loss": 0.0, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.5552, |
|
"grad_norm": 1.8994630579527438e-07, |
|
"learning_rate": 1.6299840000000002e-05, |
|
"loss": 0.0, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.5568, |
|
"grad_norm": 1.916166212367898e-07, |
|
"learning_rate": 1.6289173333333336e-05, |
|
"loss": 0.0, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.5584, |
|
"grad_norm": 1.9209957713428594e-07, |
|
"learning_rate": 1.6278506666666666e-05, |
|
"loss": 0.0, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.820495896254215e-07, |
|
"learning_rate": 1.626784e-05, |
|
"loss": 0.0, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.5616, |
|
"grad_norm": 1.488028118501461e-07, |
|
"learning_rate": 1.6257173333333333e-05, |
|
"loss": 0.0, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.5632, |
|
"grad_norm": 1.6596105467669986e-07, |
|
"learning_rate": 1.6246506666666667e-05, |
|
"loss": 0.0, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.5648, |
|
"grad_norm": 1.5144816245538095e-07, |
|
"learning_rate": 1.623584e-05, |
|
"loss": 0.0, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.5664, |
|
"grad_norm": 1.71941024973421e-07, |
|
"learning_rate": 1.6225173333333334e-05, |
|
"loss": 0.0, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.568, |
|
"grad_norm": 1.6939323188580602e-07, |
|
"learning_rate": 1.6214506666666668e-05, |
|
"loss": 0.0, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.5696, |
|
"grad_norm": 1.3844817203789717e-07, |
|
"learning_rate": 1.6203840000000002e-05, |
|
"loss": 0.0, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.5712, |
|
"grad_norm": 1.4256781355470594e-07, |
|
"learning_rate": 1.6193173333333335e-05, |
|
"loss": 0.0, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.5728, |
|
"grad_norm": 1.3021079325881146e-07, |
|
"learning_rate": 1.618250666666667e-05, |
|
"loss": 0.0, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.5744, |
|
"grad_norm": 1.2057364529027836e-07, |
|
"learning_rate": 1.617184e-05, |
|
"loss": 0.0, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 1.2874994581579813e-07, |
|
"learning_rate": 1.6161173333333336e-05, |
|
"loss": 0.0, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.5776, |
|
"grad_norm": 1.333337138476054e-07, |
|
"learning_rate": 1.6150506666666667e-05, |
|
"loss": 0.0, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.5792, |
|
"grad_norm": 1.192641150282725e-07, |
|
"learning_rate": 1.6139840000000004e-05, |
|
"loss": 0.0, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.5808, |
|
"grad_norm": 9.963146396785305e-08, |
|
"learning_rate": 1.6129173333333334e-05, |
|
"loss": 0.0, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.5824, |
|
"grad_norm": 1.1367530561301464e-07, |
|
"learning_rate": 1.6118506666666668e-05, |
|
"loss": 0.0, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.584, |
|
"grad_norm": 9.971628145422073e-08, |
|
"learning_rate": 1.610784e-05, |
|
"loss": 0.0, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.5856, |
|
"grad_norm": 9.849477322632083e-08, |
|
"learning_rate": 1.6097173333333335e-05, |
|
"loss": 0.0, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.5872, |
|
"grad_norm": 8.821568542316527e-08, |
|
"learning_rate": 1.608650666666667e-05, |
|
"loss": 0.0, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.5888, |
|
"grad_norm": 8.860416045308739e-08, |
|
"learning_rate": 1.607584e-05, |
|
"loss": 0.0, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.5904, |
|
"grad_norm": 8.838700438218439e-08, |
|
"learning_rate": 1.6065173333333336e-05, |
|
"loss": 0.0, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.592, |
|
"grad_norm": 9.879371987153718e-08, |
|
"learning_rate": 1.6054506666666666e-05, |
|
"loss": 0.0, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.5936, |
|
"grad_norm": 8.26242469997851e-08, |
|
"learning_rate": 1.6043840000000003e-05, |
|
"loss": 0.0, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.5952, |
|
"grad_norm": 7.888654351972946e-08, |
|
"learning_rate": 1.6033173333333334e-05, |
|
"loss": 0.0, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.5968, |
|
"grad_norm": 7.877645202825079e-08, |
|
"learning_rate": 1.6022613333333333e-05, |
|
"loss": 0.0, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.5984, |
|
"grad_norm": 7.30638873847056e-08, |
|
"learning_rate": 1.601194666666667e-05, |
|
"loss": 0.0, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 7.81067299726601e-08, |
|
"learning_rate": 1.600128e-05, |
|
"loss": 0.0, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.6016, |
|
"grad_norm": 6.583578482377561e-08, |
|
"learning_rate": 1.5990613333333334e-05, |
|
"loss": 0.0, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.6032, |
|
"grad_norm": 6.778850547561888e-08, |
|
"learning_rate": 1.5979946666666668e-05, |
|
"loss": 0.0, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.6048, |
|
"grad_norm": 6.064895785584667e-08, |
|
"learning_rate": 1.5969280000000002e-05, |
|
"loss": 0.0, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.6064, |
|
"grad_norm": 6.343422143118005e-08, |
|
"learning_rate": 1.5958613333333335e-05, |
|
"loss": 0.0, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 5.385956569625705e-08, |
|
"learning_rate": 1.5947946666666666e-05, |
|
"loss": 0.0, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.6096, |
|
"grad_norm": 5.910140643550221e-08, |
|
"learning_rate": 1.5937280000000003e-05, |
|
"loss": 0.0, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.6112, |
|
"grad_norm": 5.531205360398417e-08, |
|
"learning_rate": 1.5926613333333333e-05, |
|
"loss": 0.0, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.6128, |
|
"grad_norm": 4.7664677538250544e-08, |
|
"learning_rate": 1.591594666666667e-05, |
|
"loss": 0.0, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.6144, |
|
"grad_norm": 5.105589906406749e-08, |
|
"learning_rate": 1.590528e-05, |
|
"loss": 0.0, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.616, |
|
"grad_norm": 4.647805340596278e-08, |
|
"learning_rate": 1.5894613333333334e-05, |
|
"loss": 0.0, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.6176, |
|
"grad_norm": 4.669184150429828e-08, |
|
"learning_rate": 1.5883946666666668e-05, |
|
"loss": 0.0, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.6192, |
|
"grad_norm": 4.602827630151296e-08, |
|
"learning_rate": 1.587328e-05, |
|
"loss": 0.0, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.6208, |
|
"grad_norm": 4.170912504264379e-08, |
|
"learning_rate": 1.5862613333333335e-05, |
|
"loss": 0.0, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.6224, |
|
"grad_norm": 3.930464487211793e-08, |
|
"learning_rate": 1.585194666666667e-05, |
|
"loss": 0.0, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 5.003272818271398e-08, |
|
"learning_rate": 1.5841280000000002e-05, |
|
"loss": 0.0, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.6256, |
|
"grad_norm": 3.810775339729844e-08, |
|
"learning_rate": 1.5830613333333336e-05, |
|
"loss": 0.0, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.6272, |
|
"grad_norm": 3.707847540113107e-08, |
|
"learning_rate": 1.581994666666667e-05, |
|
"loss": 0.0, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.6288, |
|
"grad_norm": 3.4199096887732594e-08, |
|
"learning_rate": 1.580938666666667e-05, |
|
"loss": 0.0, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.6304, |
|
"grad_norm": 3.327440367684176e-08, |
|
"learning_rate": 1.579872e-05, |
|
"loss": 0.0, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.632, |
|
"grad_norm": 3.221984101742237e-08, |
|
"learning_rate": 1.5788053333333337e-05, |
|
"loss": 0.0, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.6336, |
|
"grad_norm": 3.06969596408635e-08, |
|
"learning_rate": 1.5777386666666667e-05, |
|
"loss": 0.0, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.6352, |
|
"grad_norm": 2.9305402549084647e-08, |
|
"learning_rate": 1.576672e-05, |
|
"loss": 0.0, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.6368, |
|
"grad_norm": 3.1664541211284813e-08, |
|
"learning_rate": 1.5756053333333334e-05, |
|
"loss": 0.0, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.6384, |
|
"grad_norm": 3.058496389485299e-08, |
|
"learning_rate": 1.5745386666666668e-05, |
|
"loss": 0.0, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.8967381382472013e-08, |
|
"learning_rate": 1.5734720000000002e-05, |
|
"loss": 0.0, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.6416, |
|
"grad_norm": 2.6066624414511352e-08, |
|
"learning_rate": 1.5724053333333335e-05, |
|
"loss": 0.0, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.6432, |
|
"grad_norm": 2.3177223695824978e-08, |
|
"learning_rate": 1.571338666666667e-05, |
|
"loss": 0.0, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.6448, |
|
"grad_norm": 2.419685785071124e-08, |
|
"learning_rate": 1.570272e-05, |
|
"loss": 0.0, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.6464, |
|
"grad_norm": 2.4422618594144296e-08, |
|
"learning_rate": 1.5692053333333336e-05, |
|
"loss": 0.0, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.648, |
|
"grad_norm": 2.526655507040232e-08, |
|
"learning_rate": 1.5681386666666667e-05, |
|
"loss": 0.0, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.6496, |
|
"grad_norm": 2.320962089186196e-08, |
|
"learning_rate": 1.567072e-05, |
|
"loss": 0.0, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.6512, |
|
"grad_norm": 2.0410428902550848e-08, |
|
"learning_rate": 1.5660053333333334e-05, |
|
"loss": 0.0, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.6528, |
|
"grad_norm": 2.2334850413585627e-08, |
|
"learning_rate": 1.5649386666666668e-05, |
|
"loss": 0.0, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.6544, |
|
"grad_norm": 2.4193813175088508e-08, |
|
"learning_rate": 1.563872e-05, |
|
"loss": 0.0, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.656, |
|
"grad_norm": 1.9213212354429743e-08, |
|
"learning_rate": 1.5628053333333335e-05, |
|
"loss": 0.0, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.6576, |
|
"grad_norm": 1.9847044896437183e-08, |
|
"learning_rate": 1.561738666666667e-05, |
|
"loss": 0.0, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.6592, |
|
"grad_norm": 1.8384987754416215e-08, |
|
"learning_rate": 1.5606720000000002e-05, |
|
"loss": 0.0, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.6608, |
|
"grad_norm": 1.830431983762537e-08, |
|
"learning_rate": 1.559616e-05, |
|
"loss": 0.0, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.6624, |
|
"grad_norm": 1.6151531667674135e-08, |
|
"learning_rate": 1.5585493333333336e-05, |
|
"loss": 0.0, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.664, |
|
"grad_norm": 1.520500525487023e-08, |
|
"learning_rate": 1.5574826666666666e-05, |
|
"loss": 0.0, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.6656, |
|
"grad_norm": 1.400090354763961e-08, |
|
"learning_rate": 1.5564160000000003e-05, |
|
"loss": 0.0, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.6672, |
|
"grad_norm": 1.5792524621360826e-08, |
|
"learning_rate": 1.5553493333333333e-05, |
|
"loss": 0.0, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.6688, |
|
"grad_norm": 1.325526088891138e-08, |
|
"learning_rate": 1.5542826666666667e-05, |
|
"loss": 0.0, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.6704, |
|
"grad_norm": 1.4574178308635055e-08, |
|
"learning_rate": 1.553216e-05, |
|
"loss": 0.0, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 1.443759423125357e-08, |
|
"learning_rate": 1.5521493333333334e-05, |
|
"loss": 0.0, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.6736, |
|
"grad_norm": 1.2646203195743055e-08, |
|
"learning_rate": 1.5510826666666668e-05, |
|
"loss": 0.0, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.6752, |
|
"grad_norm": 1.357729839668309e-08, |
|
"learning_rate": 1.5500160000000002e-05, |
|
"loss": 0.0, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.6768, |
|
"grad_norm": 1.2999631593402228e-08, |
|
"learning_rate": 1.5489493333333335e-05, |
|
"loss": 0.0, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.6784, |
|
"grad_norm": 1.2067292942674612e-08, |
|
"learning_rate": 1.547882666666667e-05, |
|
"loss": 0.0, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.2314631980814283e-08, |
|
"learning_rate": 1.546816e-05, |
|
"loss": 0.0, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.6816, |
|
"grad_norm": 1.0934391170280833e-08, |
|
"learning_rate": 1.5457493333333336e-05, |
|
"loss": 0.0, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.6832, |
|
"grad_norm": 1.2301041074636032e-08, |
|
"learning_rate": 1.5446826666666667e-05, |
|
"loss": 0.0, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.6848, |
|
"grad_norm": 1.130779558877748e-08, |
|
"learning_rate": 1.5436160000000004e-05, |
|
"loss": 0.0, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.6864, |
|
"grad_norm": 9.362263853063268e-09, |
|
"learning_rate": 1.5425493333333334e-05, |
|
"loss": 0.0, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.688, |
|
"grad_norm": 9.903635245223086e-09, |
|
"learning_rate": 1.5414826666666668e-05, |
|
"loss": 0.0, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.6896, |
|
"grad_norm": 1.0525496918489807e-08, |
|
"learning_rate": 1.540416e-05, |
|
"loss": 0.0, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.6912, |
|
"grad_norm": 9.31617361032977e-09, |
|
"learning_rate": 1.5393493333333335e-05, |
|
"loss": 0.0, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.6928, |
|
"grad_norm": 9.237457909705427e-09, |
|
"learning_rate": 1.5382933333333335e-05, |
|
"loss": 0.0, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.6944, |
|
"grad_norm": 8.73692673764026e-09, |
|
"learning_rate": 1.537226666666667e-05, |
|
"loss": 0.0, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.696, |
|
"grad_norm": 7.849454419783797e-09, |
|
"learning_rate": 1.5361600000000002e-05, |
|
"loss": 0.0, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.6976, |
|
"grad_norm": 9.30858945480395e-09, |
|
"learning_rate": 1.5350933333333336e-05, |
|
"loss": 0.0, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.6992, |
|
"grad_norm": 7.977851268492486e-09, |
|
"learning_rate": 1.5340266666666666e-05, |
|
"loss": 0.0, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.7008, |
|
"grad_norm": 7.884906061406127e-09, |
|
"learning_rate": 1.5329600000000003e-05, |
|
"loss": 0.0, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.7024, |
|
"grad_norm": 7.931284073947609e-09, |
|
"learning_rate": 1.5318933333333333e-05, |
|
"loss": 0.0, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 8.136783691270466e-09, |
|
"learning_rate": 1.5308266666666667e-05, |
|
"loss": 0.0, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.7056, |
|
"grad_norm": 7.810712077116477e-09, |
|
"learning_rate": 1.52976e-05, |
|
"loss": 0.0, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.7072, |
|
"grad_norm": 7.457011008682457e-09, |
|
"learning_rate": 1.5286933333333334e-05, |
|
"loss": 0.0, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.7088, |
|
"grad_norm": 5.947691228413987e-09, |
|
"learning_rate": 1.5276266666666668e-05, |
|
"loss": 0.0, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.7104, |
|
"grad_norm": 5.5936832943359605e-09, |
|
"learning_rate": 1.5265600000000002e-05, |
|
"loss": 0.0, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.712, |
|
"grad_norm": 6.387094853010922e-09, |
|
"learning_rate": 1.5254933333333335e-05, |
|
"loss": 0.0, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.7136, |
|
"grad_norm": 6.434018207102099e-09, |
|
"learning_rate": 1.5244266666666667e-05, |
|
"loss": 0.0, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.7152, |
|
"grad_norm": 7.640887922377715e-09, |
|
"learning_rate": 1.5233600000000001e-05, |
|
"loss": 0.0, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.7168, |
|
"grad_norm": 6.9532259949767194e-09, |
|
"learning_rate": 1.5222933333333335e-05, |
|
"loss": 0.0, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.7184, |
|
"grad_norm": 5.967400351636343e-09, |
|
"learning_rate": 1.5212266666666668e-05, |
|
"loss": 0.0, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 6.471727154178097e-09, |
|
"learning_rate": 1.52016e-05, |
|
"loss": 0.0, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.7216, |
|
"grad_norm": 6.3445786402382964e-09, |
|
"learning_rate": 1.5190933333333336e-05, |
|
"loss": 0.0, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.7232, |
|
"grad_norm": 5.881137354890598e-09, |
|
"learning_rate": 1.5180266666666668e-05, |
|
"loss": 0.0, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.7248, |
|
"grad_norm": 6.409100361537412e-09, |
|
"learning_rate": 1.5169706666666667e-05, |
|
"loss": 0.0, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.7264, |
|
"grad_norm": 5.837366146010936e-09, |
|
"learning_rate": 1.5159040000000001e-05, |
|
"loss": 0.0, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.728, |
|
"grad_norm": 5.142333669994059e-09, |
|
"learning_rate": 1.5148373333333335e-05, |
|
"loss": 0.0, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.7296, |
|
"grad_norm": 5.3732072125001196e-09, |
|
"learning_rate": 1.5137706666666667e-05, |
|
"loss": 0.0, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.7312, |
|
"grad_norm": 5.1184545490912114e-09, |
|
"learning_rate": 1.5127040000000002e-05, |
|
"loss": 0.0, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.7328, |
|
"grad_norm": 4.9866528684106015e-09, |
|
"learning_rate": 1.5116373333333334e-05, |
|
"loss": 0.0, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.7344, |
|
"grad_norm": 5.368807620698135e-09, |
|
"learning_rate": 1.5105706666666668e-05, |
|
"loss": 0.0, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 4.762381600187382e-09, |
|
"learning_rate": 1.5095040000000001e-05, |
|
"loss": 0.0, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.7376, |
|
"grad_norm": 4.221057281483809e-09, |
|
"learning_rate": 1.5084373333333335e-05, |
|
"loss": 0.0, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.7392, |
|
"grad_norm": 5.685349524497951e-09, |
|
"learning_rate": 1.5073706666666667e-05, |
|
"loss": 0.0, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.7408, |
|
"grad_norm": 4.665808184256548e-09, |
|
"learning_rate": 1.5063040000000002e-05, |
|
"loss": 0.0, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.7424, |
|
"grad_norm": 4.947431353485854e-09, |
|
"learning_rate": 1.5052373333333334e-05, |
|
"loss": 0.0, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.744, |
|
"grad_norm": 4.350499516192485e-09, |
|
"learning_rate": 1.504170666666667e-05, |
|
"loss": 0.0, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.7456, |
|
"grad_norm": 4.42672432043878e-09, |
|
"learning_rate": 1.5031040000000002e-05, |
|
"loss": 0.0, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.7472, |
|
"grad_norm": 4.635956507570427e-09, |
|
"learning_rate": 1.5020373333333334e-05, |
|
"loss": 0.0, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.7488, |
|
"grad_norm": 4.093439365249196e-09, |
|
"learning_rate": 1.5009706666666667e-05, |
|
"loss": 0.0, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.7504, |
|
"grad_norm": 4.4049284220193385e-09, |
|
"learning_rate": 1.4999040000000001e-05, |
|
"loss": 0.0, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.752, |
|
"grad_norm": 4.6566723810315125e-09, |
|
"learning_rate": 1.4988373333333335e-05, |
|
"loss": 0.0, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.7536, |
|
"grad_norm": 3.575250540066577e-09, |
|
"learning_rate": 1.4977706666666667e-05, |
|
"loss": 0.0, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.7552, |
|
"grad_norm": 4.647517037881244e-09, |
|
"learning_rate": 1.4967040000000002e-05, |
|
"loss": 0.0, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.7568, |
|
"grad_norm": 3.6755953836120625e-09, |
|
"learning_rate": 1.4956480000000002e-05, |
|
"loss": 0.0, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.7584, |
|
"grad_norm": 3.9804803897425245e-09, |
|
"learning_rate": 1.4945813333333334e-05, |
|
"loss": 0.0, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 3.5315350643827514e-09, |
|
"learning_rate": 1.493514666666667e-05, |
|
"loss": 0.0, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.7616, |
|
"grad_norm": 3.650338031846445e-09, |
|
"learning_rate": 1.4924480000000001e-05, |
|
"loss": 0.0, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.7632, |
|
"grad_norm": 3.916106550150289e-09, |
|
"learning_rate": 1.4913813333333333e-05, |
|
"loss": 0.0, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.7648, |
|
"grad_norm": 3.62517726948397e-09, |
|
"learning_rate": 1.4903146666666668e-05, |
|
"loss": 0.0, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.7664, |
|
"grad_norm": 3.878641408050498e-09, |
|
"learning_rate": 1.489248e-05, |
|
"loss": 0.0, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 4.047121304751045e-09, |
|
"learning_rate": 1.4881813333333334e-05, |
|
"loss": 0.0, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.7696, |
|
"grad_norm": 3.005124149524363e-09, |
|
"learning_rate": 1.4871146666666668e-05, |
|
"loss": 0.0, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.7712, |
|
"grad_norm": 3.664456960095208e-09, |
|
"learning_rate": 1.4860480000000001e-05, |
|
"loss": 0.0, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.7728, |
|
"grad_norm": 3.27337112970838e-09, |
|
"learning_rate": 1.4849813333333333e-05, |
|
"loss": 0.0, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.7744, |
|
"grad_norm": 3.1061264671450317e-09, |
|
"learning_rate": 1.4839146666666669e-05, |
|
"loss": 0.0, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.776, |
|
"grad_norm": 2.910289342850092e-09, |
|
"learning_rate": 1.482848e-05, |
|
"loss": 0.0, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.7776, |
|
"grad_norm": 3.726303710038792e-09, |
|
"learning_rate": 1.4817813333333334e-05, |
|
"loss": 0.0, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.7792, |
|
"grad_norm": 3.082652577646172e-09, |
|
"learning_rate": 1.4807146666666668e-05, |
|
"loss": 0.0, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.7808, |
|
"grad_norm": 2.9859092975925705e-09, |
|
"learning_rate": 1.4796480000000002e-05, |
|
"loss": 0.0, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.7824, |
|
"grad_norm": 3.238060486410177e-09, |
|
"learning_rate": 1.4785813333333334e-05, |
|
"loss": 0.0, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.784, |
|
"grad_norm": 2.965995005155264e-09, |
|
"learning_rate": 1.4775146666666669e-05, |
|
"loss": 0.0, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.7856, |
|
"grad_norm": 2.5330839648063375e-09, |
|
"learning_rate": 1.4764480000000001e-05, |
|
"loss": 0.0, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.7872, |
|
"grad_norm": 2.8175941579888786e-09, |
|
"learning_rate": 1.4753813333333336e-05, |
|
"loss": 0.0, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.7888, |
|
"grad_norm": 2.6512554374136243e-09, |
|
"learning_rate": 1.4743253333333335e-05, |
|
"loss": 0.0, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.7904, |
|
"grad_norm": 3.005082183094032e-09, |
|
"learning_rate": 1.4732586666666668e-05, |
|
"loss": 0.0, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.792, |
|
"grad_norm": 2.7331856777834673e-09, |
|
"learning_rate": 1.472192e-05, |
|
"loss": 0.0, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.7936, |
|
"grad_norm": 2.535325061003846e-09, |
|
"learning_rate": 1.4711253333333336e-05, |
|
"loss": 0.0, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.7952, |
|
"grad_norm": 2.5567281625171745e-09, |
|
"learning_rate": 1.4700586666666667e-05, |
|
"loss": 0.0, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.7968, |
|
"grad_norm": 2.561580725313206e-09, |
|
"learning_rate": 1.4689920000000001e-05, |
|
"loss": 0.0, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.7984, |
|
"grad_norm": 2.534361831507681e-09, |
|
"learning_rate": 1.4679253333333335e-05, |
|
"loss": 0.0, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.616443062208873e-09, |
|
"learning_rate": 1.4668586666666668e-05, |
|
"loss": 0.0, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.8016, |
|
"grad_norm": 3.016526806121078e-09, |
|
"learning_rate": 1.465792e-05, |
|
"loss": 0.0, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.8032, |
|
"grad_norm": 2.5457300711906328e-09, |
|
"learning_rate": 1.4647253333333336e-05, |
|
"loss": 0.0, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.8048, |
|
"grad_norm": 2.6220687843192536e-09, |
|
"learning_rate": 1.4636586666666668e-05, |
|
"loss": 0.0, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.8064, |
|
"grad_norm": 2.8558380105181413e-09, |
|
"learning_rate": 1.462592e-05, |
|
"loss": 0.0, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.808, |
|
"grad_norm": 2.5608188902737083e-09, |
|
"learning_rate": 1.4615253333333335e-05, |
|
"loss": 0.0, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.8096, |
|
"grad_norm": 2.820969235983739e-09, |
|
"learning_rate": 1.4604586666666667e-05, |
|
"loss": 0.0, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.8112, |
|
"grad_norm": 2.7018629555897178e-09, |
|
"learning_rate": 1.459392e-05, |
|
"loss": 0.0, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.8128, |
|
"grad_norm": 2.4657107466907746e-09, |
|
"learning_rate": 1.4583253333333334e-05, |
|
"loss": 0.0, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.8144, |
|
"grad_norm": 2.240560625921262e-09, |
|
"learning_rate": 1.4572586666666668e-05, |
|
"loss": 0.0, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 2.1840855790600244e-09, |
|
"learning_rate": 1.456192e-05, |
|
"loss": 0.0, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.8176, |
|
"grad_norm": 2.396337350774047e-09, |
|
"learning_rate": 1.4551253333333335e-05, |
|
"loss": 0.0, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.8192, |
|
"grad_norm": 2.2762063345282968e-09, |
|
"learning_rate": 1.4540586666666667e-05, |
|
"loss": 0.0, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.8208, |
|
"grad_norm": 2.1621027190832365e-09, |
|
"learning_rate": 1.4530026666666667e-05, |
|
"loss": 0.0, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.8224, |
|
"grad_norm": 2.2733732674140583e-09, |
|
"learning_rate": 1.4519360000000001e-05, |
|
"loss": 0.0, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.824, |
|
"grad_norm": 2.062521931023298e-09, |
|
"learning_rate": 1.4508693333333335e-05, |
|
"loss": 0.0, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.8256, |
|
"grad_norm": 2.5127442349059947e-09, |
|
"learning_rate": 1.4498026666666666e-05, |
|
"loss": 0.0, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.8272, |
|
"grad_norm": 2.24668172954523e-09, |
|
"learning_rate": 1.4487360000000002e-05, |
|
"loss": 0.0, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.8288, |
|
"grad_norm": 2.0432591174568415e-09, |
|
"learning_rate": 1.4476693333333334e-05, |
|
"loss": 0.0, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.8304, |
|
"grad_norm": 2.4273549836806296e-09, |
|
"learning_rate": 1.4466026666666667e-05, |
|
"loss": 0.0, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 2.0034554015779804e-09, |
|
"learning_rate": 1.4455360000000001e-05, |
|
"loss": 0.0, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.8336, |
|
"grad_norm": 1.9104446913331685e-09, |
|
"learning_rate": 1.4444693333333335e-05, |
|
"loss": 0.0, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.8352, |
|
"grad_norm": 1.9703714215779655e-09, |
|
"learning_rate": 1.4434026666666667e-05, |
|
"loss": 0.0, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.8368, |
|
"grad_norm": 1.8734007678489206e-09, |
|
"learning_rate": 1.4423360000000002e-05, |
|
"loss": 0.0, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.8384, |
|
"grad_norm": 2.433496959497461e-09, |
|
"learning_rate": 1.4412693333333334e-05, |
|
"loss": 0.0, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.915407166208638e-09, |
|
"learning_rate": 1.440202666666667e-05, |
|
"loss": 0.0, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.8416, |
|
"grad_norm": 2.026956380518641e-09, |
|
"learning_rate": 1.4391360000000001e-05, |
|
"loss": 0.0, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.8432, |
|
"grad_norm": 1.8809005464248685e-09, |
|
"learning_rate": 1.4380693333333335e-05, |
|
"loss": 0.0, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.8448, |
|
"grad_norm": 1.7579672162426618e-09, |
|
"learning_rate": 1.4370026666666667e-05, |
|
"loss": 0.0, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.8464, |
|
"grad_norm": 1.972140895034613e-09, |
|
"learning_rate": 1.4359360000000002e-05, |
|
"loss": 0.0, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.848, |
|
"grad_norm": 2.309159086166801e-09, |
|
"learning_rate": 1.4348693333333334e-05, |
|
"loss": 0.0, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.8496, |
|
"grad_norm": 2.2458994664020793e-09, |
|
"learning_rate": 1.4338026666666666e-05, |
|
"loss": 0.0, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.8512, |
|
"grad_norm": 2.3398747384106855e-09, |
|
"learning_rate": 1.4327360000000002e-05, |
|
"loss": 0.0, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.8528, |
|
"grad_norm": 1.8828840708806638e-09, |
|
"learning_rate": 1.4316800000000002e-05, |
|
"loss": 0.0, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.8544, |
|
"grad_norm": 1.7677028729679023e-09, |
|
"learning_rate": 1.4306133333333334e-05, |
|
"loss": 0.0, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.856, |
|
"grad_norm": 1.8229453502272008e-09, |
|
"learning_rate": 1.4295466666666669e-05, |
|
"loss": 0.0, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.8576, |
|
"grad_norm": 1.7983363687079645e-09, |
|
"learning_rate": 1.4284800000000001e-05, |
|
"loss": 0.0, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.8592, |
|
"grad_norm": 1.6915907563586075e-09, |
|
"learning_rate": 1.4274133333333336e-05, |
|
"loss": 0.0, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.8608, |
|
"grad_norm": 2.047505054392218e-09, |
|
"learning_rate": 1.4263466666666668e-05, |
|
"loss": 0.0, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.8624, |
|
"grad_norm": 1.645070635269974e-09, |
|
"learning_rate": 1.4252800000000002e-05, |
|
"loss": 0.0, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 1.6591610307870042e-09, |
|
"learning_rate": 1.4242133333333334e-05, |
|
"loss": 0.0, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.8656, |
|
"grad_norm": 1.6760498544599045e-09, |
|
"learning_rate": 1.4231466666666667e-05, |
|
"loss": 0.0, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.8672, |
|
"grad_norm": 1.8105128507528434e-09, |
|
"learning_rate": 1.4220800000000001e-05, |
|
"loss": 0.0, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.8688, |
|
"grad_norm": 1.724317466589298e-09, |
|
"learning_rate": 1.4210133333333333e-05, |
|
"loss": 0.0, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.8704, |
|
"grad_norm": 1.5064500757944188e-09, |
|
"learning_rate": 1.4199466666666668e-05, |
|
"loss": 0.0, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.872, |
|
"grad_norm": 2.0849286741508877e-09, |
|
"learning_rate": 1.41888e-05, |
|
"loss": 0.0, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.8736, |
|
"grad_norm": 1.595770737772284e-09, |
|
"learning_rate": 1.4178133333333334e-05, |
|
"loss": 0.0, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.8752, |
|
"grad_norm": 1.4647668633571698e-09, |
|
"learning_rate": 1.4167466666666668e-05, |
|
"loss": 0.0, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.8768, |
|
"grad_norm": 1.5293233346369561e-09, |
|
"learning_rate": 1.4156800000000001e-05, |
|
"loss": 0.0, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.8784, |
|
"grad_norm": 1.5121590646316463e-09, |
|
"learning_rate": 1.4146133333333333e-05, |
|
"loss": 0.0, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.7063028767694277e-09, |
|
"learning_rate": 1.4135466666666669e-05, |
|
"loss": 0.0, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.8816, |
|
"grad_norm": 1.6600216756756936e-09, |
|
"learning_rate": 1.41248e-05, |
|
"loss": 0.0, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.8832, |
|
"grad_norm": 1.6948151770890263e-09, |
|
"learning_rate": 1.4114133333333336e-05, |
|
"loss": 0.0, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.8848, |
|
"grad_norm": 1.6159773519319742e-09, |
|
"learning_rate": 1.4103573333333334e-05, |
|
"loss": 0.0, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.8864, |
|
"grad_norm": 1.5366856676024554e-09, |
|
"learning_rate": 1.4092906666666668e-05, |
|
"loss": 0.0, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.888, |
|
"grad_norm": 1.4232574008232746e-09, |
|
"learning_rate": 1.408224e-05, |
|
"loss": 0.0, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.8896, |
|
"grad_norm": 1.7679161468109328e-09, |
|
"learning_rate": 1.4071573333333335e-05, |
|
"loss": 0.0, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.8912, |
|
"grad_norm": 1.4493197753040477e-09, |
|
"learning_rate": 1.4060906666666667e-05, |
|
"loss": 0.0, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.8928, |
|
"grad_norm": 1.4467651521243852e-09, |
|
"learning_rate": 1.4050240000000001e-05, |
|
"loss": 0.0, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.8944, |
|
"grad_norm": 1.46508472020912e-09, |
|
"learning_rate": 1.4039573333333335e-05, |
|
"loss": 0.0, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 1.4291234862184865e-09, |
|
"learning_rate": 1.4028906666666668e-05, |
|
"loss": 0.0, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.8976, |
|
"grad_norm": 1.8533777845775035e-09, |
|
"learning_rate": 1.401824e-05, |
|
"loss": 0.0, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.8992, |
|
"grad_norm": 1.6173311578882021e-09, |
|
"learning_rate": 1.4007573333333335e-05, |
|
"loss": 0.0, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.9008, |
|
"grad_norm": 1.6096453059333271e-09, |
|
"learning_rate": 1.3996906666666667e-05, |
|
"loss": 0.0, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.9024, |
|
"grad_norm": 1.5629680882867092e-09, |
|
"learning_rate": 1.3986240000000003e-05, |
|
"loss": 0.0, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.904, |
|
"grad_norm": 1.6487086140770657e-09, |
|
"learning_rate": 1.3975573333333335e-05, |
|
"loss": 0.0, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.9056, |
|
"grad_norm": 1.561828555374234e-09, |
|
"learning_rate": 1.3964906666666668e-05, |
|
"loss": 0.0, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.9072, |
|
"grad_norm": 1.6236398892033321e-09, |
|
"learning_rate": 1.395424e-05, |
|
"loss": 0.0, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.9088, |
|
"grad_norm": 1.3952866639854733e-09, |
|
"learning_rate": 1.3943573333333334e-05, |
|
"loss": 0.0, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.9104, |
|
"grad_norm": 1.4216349208950874e-09, |
|
"learning_rate": 1.3932906666666668e-05, |
|
"loss": 0.0, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 1.4121263047783827e-09, |
|
"learning_rate": 1.392224e-05, |
|
"loss": 0.0, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.9136, |
|
"grad_norm": 1.7500071392007044e-09, |
|
"learning_rate": 1.3911573333333335e-05, |
|
"loss": 0.0, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.9152, |
|
"grad_norm": 1.6262031721225867e-09, |
|
"learning_rate": 1.3900906666666667e-05, |
|
"loss": 0.0, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.9168, |
|
"grad_norm": 1.3557924782858777e-09, |
|
"learning_rate": 1.3890346666666667e-05, |
|
"loss": 0.0, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.9184, |
|
"grad_norm": 1.4752008503648995e-09, |
|
"learning_rate": 1.3879680000000002e-05, |
|
"loss": 0.0, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.3589706027161697e-09, |
|
"learning_rate": 1.3869013333333334e-05, |
|
"loss": 0.0, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.9216, |
|
"grad_norm": 1.4188084040966942e-09, |
|
"learning_rate": 1.385834666666667e-05, |
|
"loss": 0.0, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.9232, |
|
"grad_norm": 1.5505075001698287e-09, |
|
"learning_rate": 1.3847680000000002e-05, |
|
"loss": 0.0, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.9248, |
|
"grad_norm": 1.5491343763329724e-09, |
|
"learning_rate": 1.3837013333333334e-05, |
|
"loss": 0.0, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.9264, |
|
"grad_norm": 1.2985361674822116e-09, |
|
"learning_rate": 1.3826346666666667e-05, |
|
"loss": 0.0, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 1.167143826030781e-09, |
|
"learning_rate": 1.3815680000000001e-05, |
|
"loss": 0.0, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.9296, |
|
"grad_norm": 1.291968310113134e-09, |
|
"learning_rate": 1.3805013333333335e-05, |
|
"loss": 0.0, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.9312, |
|
"grad_norm": 1.3477001736816874e-09, |
|
"learning_rate": 1.3794346666666666e-05, |
|
"loss": 0.0, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.9328, |
|
"grad_norm": 1.2875156496505724e-09, |
|
"learning_rate": 1.3783680000000002e-05, |
|
"loss": 0.0, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.9344, |
|
"grad_norm": 1.3489318551052065e-09, |
|
"learning_rate": 1.3773013333333334e-05, |
|
"loss": 0.0, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.936, |
|
"grad_norm": 1.3920068431261257e-09, |
|
"learning_rate": 1.3762346666666667e-05, |
|
"loss": 0.0, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.9376, |
|
"grad_norm": 1.8122923162167126e-09, |
|
"learning_rate": 1.3751680000000001e-05, |
|
"loss": 0.0, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.9392, |
|
"grad_norm": 1.2962692030882295e-09, |
|
"learning_rate": 1.3741013333333335e-05, |
|
"loss": 0.0, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.9408, |
|
"grad_norm": 1.5180040557893903e-09, |
|
"learning_rate": 1.3730346666666667e-05, |
|
"loss": 0.0, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.9424, |
|
"grad_norm": 1.237118185670738e-09, |
|
"learning_rate": 1.3719680000000002e-05, |
|
"loss": 0.0, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.944, |
|
"grad_norm": 1.2209625532833002e-09, |
|
"learning_rate": 1.3709013333333334e-05, |
|
"loss": 0.0, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.9456, |
|
"grad_norm": 1.1343942452057831e-09, |
|
"learning_rate": 1.369834666666667e-05, |
|
"loss": 0.0, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.9472, |
|
"grad_norm": 1.5663039754088004e-09, |
|
"learning_rate": 1.3687680000000001e-05, |
|
"loss": 0.0, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.9488, |
|
"grad_norm": 1.328584242621389e-09, |
|
"learning_rate": 1.3677120000000001e-05, |
|
"loss": 0.0, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.9504, |
|
"grad_norm": 1.2603350585393969e-09, |
|
"learning_rate": 1.3666453333333333e-05, |
|
"loss": 0.0, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.952, |
|
"grad_norm": 1.3242087426590388e-09, |
|
"learning_rate": 1.3655786666666669e-05, |
|
"loss": 0.0, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.9536, |
|
"grad_norm": 1.200673116485973e-09, |
|
"learning_rate": 1.364512e-05, |
|
"loss": 0.0, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.9552, |
|
"grad_norm": 1.431109231120331e-09, |
|
"learning_rate": 1.3634453333333336e-05, |
|
"loss": 0.0, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.9568, |
|
"grad_norm": 1.4143181070735977e-09, |
|
"learning_rate": 1.3623786666666668e-05, |
|
"loss": 0.0, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.9584, |
|
"grad_norm": 1.1156711110515971e-09, |
|
"learning_rate": 1.3613120000000002e-05, |
|
"loss": 0.0, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.215820111255539e-09, |
|
"learning_rate": 1.3602453333333334e-05, |
|
"loss": 0.0, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.9616, |
|
"grad_norm": 1.0717124965253788e-09, |
|
"learning_rate": 1.3591786666666669e-05, |
|
"loss": 0.0, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.9632, |
|
"grad_norm": 1.3140745158679579e-09, |
|
"learning_rate": 1.358112e-05, |
|
"loss": 0.0, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.9648, |
|
"grad_norm": 1.3158559797332714e-09, |
|
"learning_rate": 1.3570453333333336e-05, |
|
"loss": 0.0, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.9664, |
|
"grad_norm": 1.1407147448849742e-09, |
|
"learning_rate": 1.3559786666666668e-05, |
|
"loss": 0.0, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.968, |
|
"grad_norm": 1.2518017733498255e-09, |
|
"learning_rate": 1.354912e-05, |
|
"loss": 0.0, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.9696, |
|
"grad_norm": 1.0706601161203366e-09, |
|
"learning_rate": 1.3538453333333334e-05, |
|
"loss": 0.0, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.9712, |
|
"grad_norm": 1.2506353730401543e-09, |
|
"learning_rate": 1.3527786666666667e-05, |
|
"loss": 0.0, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.9728, |
|
"grad_norm": 1.1973613212035161e-09, |
|
"learning_rate": 1.3517120000000001e-05, |
|
"loss": 0.0, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.9744, |
|
"grad_norm": 1.105602609463574e-09, |
|
"learning_rate": 1.3506453333333333e-05, |
|
"loss": 0.0, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.976, |
|
"grad_norm": 1.1296757973511262e-09, |
|
"learning_rate": 1.3495786666666668e-05, |
|
"loss": 0.0, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.9776, |
|
"grad_norm": 1.1829981438893356e-09, |
|
"learning_rate": 1.348512e-05, |
|
"loss": 0.0, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.9792, |
|
"grad_norm": 1.1169806191091425e-09, |
|
"learning_rate": 1.3474453333333336e-05, |
|
"loss": 0.0, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.9808, |
|
"grad_norm": 1.0116082416189442e-09, |
|
"learning_rate": 1.3463893333333336e-05, |
|
"loss": 0.0, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.9824, |
|
"grad_norm": 1.1309302383466502e-09, |
|
"learning_rate": 1.3453226666666668e-05, |
|
"loss": 0.0, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.984, |
|
"grad_norm": 1.0152529927864862e-09, |
|
"learning_rate": 1.344256e-05, |
|
"loss": 0.0, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.9856, |
|
"grad_norm": 1.1926426513042543e-09, |
|
"learning_rate": 1.3431893333333335e-05, |
|
"loss": 0.0, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.9872, |
|
"grad_norm": 1.0562004604253161e-09, |
|
"learning_rate": 1.3421226666666667e-05, |
|
"loss": 0.0, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.9888, |
|
"grad_norm": 1.2290218842636591e-09, |
|
"learning_rate": 1.341056e-05, |
|
"loss": 0.0, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.9904, |
|
"grad_norm": 1.3118912622900325e-09, |
|
"learning_rate": 1.3399893333333334e-05, |
|
"loss": 0.0, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 1.089616175065089e-09, |
|
"learning_rate": 1.3389226666666668e-05, |
|
"loss": 0.0, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.9936, |
|
"grad_norm": 1.0423790719471526e-09, |
|
"learning_rate": 1.337856e-05, |
|
"loss": 0.0, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.9952, |
|
"grad_norm": 1.2500789292602121e-09, |
|
"learning_rate": 1.3367893333333335e-05, |
|
"loss": 0.0, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.9968, |
|
"grad_norm": 1.1556530177259106e-09, |
|
"learning_rate": 1.3357226666666667e-05, |
|
"loss": 0.0, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"grad_norm": 9.741868423063238e-10, |
|
"learning_rate": 1.3346560000000003e-05, |
|
"loss": 0.0, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.197379306816515e-09, |
|
"learning_rate": 1.3335893333333335e-05, |
|
"loss": 0.0, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 8.1185, |
|
"eval_samples_per_second": 615.878, |
|
"eval_steps_per_second": 19.339, |
|
"step": 62500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 187500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.2619537205899904e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|