|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 515, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009708737864077669, |
|
"grad_norm": 6.002007543902212, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 1.0598, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.019417475728155338, |
|
"grad_norm": 5.991740133700607, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 1.0579, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02912621359223301, |
|
"grad_norm": 5.825979646199088, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 1.0544, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.038834951456310676, |
|
"grad_norm": 4.364941447544986, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 1.0106, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04854368932038835, |
|
"grad_norm": 2.7678687055700784, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 0.9739, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05825242718446602, |
|
"grad_norm": 2.5422026912598827, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 0.948, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06796116504854369, |
|
"grad_norm": 3.961951944224408, |
|
"learning_rate": 1.076923076923077e-05, |
|
"loss": 0.9572, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.07766990291262135, |
|
"grad_norm": 3.5883110693606577, |
|
"learning_rate": 1.230769230769231e-05, |
|
"loss": 0.9112, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.08737864077669903, |
|
"grad_norm": 3.860767785745949, |
|
"learning_rate": 1.3846153846153847e-05, |
|
"loss": 0.9007, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0970873786407767, |
|
"grad_norm": 2.7156118144668633, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 0.8734, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10679611650485436, |
|
"grad_norm": 1.9289111077886563, |
|
"learning_rate": 1.6923076923076924e-05, |
|
"loss": 0.8391, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.11650485436893204, |
|
"grad_norm": 1.8123247995331513, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 0.8171, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.1262135922330097, |
|
"grad_norm": 1.428428989967321, |
|
"learning_rate": 2e-05, |
|
"loss": 0.7956, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.13592233009708737, |
|
"grad_norm": 1.3680143343907647, |
|
"learning_rate": 2.153846153846154e-05, |
|
"loss": 0.7813, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.14563106796116504, |
|
"grad_norm": 1.3879441764342295, |
|
"learning_rate": 2.3076923076923076e-05, |
|
"loss": 0.7681, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1553398058252427, |
|
"grad_norm": 1.0796522703025953, |
|
"learning_rate": 2.461538461538462e-05, |
|
"loss": 0.7574, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1650485436893204, |
|
"grad_norm": 1.371942931930126, |
|
"learning_rate": 2.6153846153846157e-05, |
|
"loss": 0.748, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.17475728155339806, |
|
"grad_norm": 1.410433870930946, |
|
"learning_rate": 2.7692307692307694e-05, |
|
"loss": 0.7432, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.18446601941747573, |
|
"grad_norm": 1.0096190041268163, |
|
"learning_rate": 2.923076923076923e-05, |
|
"loss": 0.7329, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1941747572815534, |
|
"grad_norm": 1.7265065580167445, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 0.7295, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.20388349514563106, |
|
"grad_norm": 1.1032179992161404, |
|
"learning_rate": 3.230769230769231e-05, |
|
"loss": 0.7245, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.21359223300970873, |
|
"grad_norm": 1.0901184558146035, |
|
"learning_rate": 3.384615384615385e-05, |
|
"loss": 0.7183, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.22330097087378642, |
|
"grad_norm": 1.7732487418217808, |
|
"learning_rate": 3.538461538461539e-05, |
|
"loss": 0.7136, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.23300970873786409, |
|
"grad_norm": 1.430366935608713, |
|
"learning_rate": 3.692307692307693e-05, |
|
"loss": 0.7105, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.24271844660194175, |
|
"grad_norm": 1.682581513255261, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.6988, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2524271844660194, |
|
"grad_norm": 1.706823703289166, |
|
"learning_rate": 4e-05, |
|
"loss": 0.6991, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.2621359223300971, |
|
"grad_norm": 2.014798562695819, |
|
"learning_rate": 4.1538461538461544e-05, |
|
"loss": 0.6955, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.27184466019417475, |
|
"grad_norm": 1.4584117807771175, |
|
"learning_rate": 4.307692307692308e-05, |
|
"loss": 0.6901, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2815533980582524, |
|
"grad_norm": 2.5240991865834883, |
|
"learning_rate": 4.461538461538462e-05, |
|
"loss": 0.6901, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.2912621359223301, |
|
"grad_norm": 1.7349683843216974, |
|
"learning_rate": 4.615384615384615e-05, |
|
"loss": 0.6912, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.30097087378640774, |
|
"grad_norm": 2.453207741393019, |
|
"learning_rate": 4.76923076923077e-05, |
|
"loss": 0.6837, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.3106796116504854, |
|
"grad_norm": 2.1891597642473517, |
|
"learning_rate": 4.923076923076924e-05, |
|
"loss": 0.6834, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.32038834951456313, |
|
"grad_norm": 2.0789856106085867, |
|
"learning_rate": 5.076923076923077e-05, |
|
"loss": 0.6854, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3300970873786408, |
|
"grad_norm": 1.7347914028228881, |
|
"learning_rate": 5.230769230769231e-05, |
|
"loss": 0.6734, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.33980582524271846, |
|
"grad_norm": 2.1284302056196744, |
|
"learning_rate": 5.3846153846153853e-05, |
|
"loss": 0.6741, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.34951456310679613, |
|
"grad_norm": 1.9299657090664841, |
|
"learning_rate": 5.538461538461539e-05, |
|
"loss": 0.6737, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.3592233009708738, |
|
"grad_norm": 2.1216828159935135, |
|
"learning_rate": 5.692307692307693e-05, |
|
"loss": 0.6701, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.36893203883495146, |
|
"grad_norm": 1.2362157926807915, |
|
"learning_rate": 5.846153846153846e-05, |
|
"loss": 0.6639, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3786407766990291, |
|
"grad_norm": 2.2344300721881525, |
|
"learning_rate": 6.000000000000001e-05, |
|
"loss": 0.666, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.3883495145631068, |
|
"grad_norm": 1.544248418606736, |
|
"learning_rate": 6.153846153846155e-05, |
|
"loss": 0.6656, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.39805825242718446, |
|
"grad_norm": 2.774120239479565, |
|
"learning_rate": 6.307692307692308e-05, |
|
"loss": 0.6683, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.4077669902912621, |
|
"grad_norm": 2.1993216366589663, |
|
"learning_rate": 6.461538461538462e-05, |
|
"loss": 0.6649, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.4174757281553398, |
|
"grad_norm": 1.8997195417294006, |
|
"learning_rate": 6.615384615384616e-05, |
|
"loss": 0.6501, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.42718446601941745, |
|
"grad_norm": 1.9017422216012896, |
|
"learning_rate": 6.76923076923077e-05, |
|
"loss": 0.6556, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.4368932038834951, |
|
"grad_norm": 2.2115485405167603, |
|
"learning_rate": 6.923076923076924e-05, |
|
"loss": 0.6582, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.44660194174757284, |
|
"grad_norm": 2.2558473718778282, |
|
"learning_rate": 7.076923076923078e-05, |
|
"loss": 0.6599, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.4563106796116505, |
|
"grad_norm": 1.8236441414000624, |
|
"learning_rate": 7.230769230769232e-05, |
|
"loss": 0.6502, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.46601941747572817, |
|
"grad_norm": 1.668233835252892, |
|
"learning_rate": 7.384615384615386e-05, |
|
"loss": 0.6525, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.47572815533980584, |
|
"grad_norm": 3.1753217004320975, |
|
"learning_rate": 7.538461538461539e-05, |
|
"loss": 0.6627, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4854368932038835, |
|
"grad_norm": 1.621218570570416, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 0.6484, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.49514563106796117, |
|
"grad_norm": 4.339926226583821, |
|
"learning_rate": 7.846153846153847e-05, |
|
"loss": 0.6752, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.5048543689320388, |
|
"grad_norm": 3.113087384499717, |
|
"learning_rate": 8e-05, |
|
"loss": 0.6722, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.5145631067961165, |
|
"grad_norm": 2.714482442820161, |
|
"learning_rate": 7.999907919834168e-05, |
|
"loss": 0.6628, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.5242718446601942, |
|
"grad_norm": 2.914244429607087, |
|
"learning_rate": 7.999631683576055e-05, |
|
"loss": 0.6581, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.5339805825242718, |
|
"grad_norm": 1.884493343669805, |
|
"learning_rate": 7.999171303943594e-05, |
|
"loss": 0.6497, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5436893203883495, |
|
"grad_norm": 3.1324813256148873, |
|
"learning_rate": 7.998526802132707e-05, |
|
"loss": 0.6539, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5533980582524272, |
|
"grad_norm": 1.9123959562537083, |
|
"learning_rate": 7.997698207816309e-05, |
|
"loss": 0.6491, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5631067961165048, |
|
"grad_norm": 2.223063366245145, |
|
"learning_rate": 7.99668555914295e-05, |
|
"loss": 0.6515, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5728155339805825, |
|
"grad_norm": 2.268558088911201, |
|
"learning_rate": 7.995488902735063e-05, |
|
"loss": 0.6573, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5825242718446602, |
|
"grad_norm": 1.5722495721856204, |
|
"learning_rate": 7.994108293686804e-05, |
|
"loss": 0.6441, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5922330097087378, |
|
"grad_norm": 1.666894724940502, |
|
"learning_rate": 7.992543795561527e-05, |
|
"loss": 0.6408, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.6019417475728155, |
|
"grad_norm": 1.0031568832099003, |
|
"learning_rate": 7.990795480388861e-05, |
|
"loss": 0.6342, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.6116504854368932, |
|
"grad_norm": 2.2162360491704067, |
|
"learning_rate": 7.988863428661377e-05, |
|
"loss": 0.6589, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.6213592233009708, |
|
"grad_norm": 1.7496212177283834, |
|
"learning_rate": 7.9867477293309e-05, |
|
"loss": 0.6484, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.6310679611650486, |
|
"grad_norm": 1.5169651851287749, |
|
"learning_rate": 7.984448479804398e-05, |
|
"loss": 0.6394, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6407766990291263, |
|
"grad_norm": 1.9120046573933336, |
|
"learning_rate": 7.981965785939515e-05, |
|
"loss": 0.635, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.6504854368932039, |
|
"grad_norm": 1.7557422251639143, |
|
"learning_rate": 7.97929976203968e-05, |
|
"loss": 0.647, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.6601941747572816, |
|
"grad_norm": 1.3556049828157026, |
|
"learning_rate": 7.976450530848851e-05, |
|
"loss": 0.6374, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.6699029126213593, |
|
"grad_norm": 2.244626845581171, |
|
"learning_rate": 7.973418223545874e-05, |
|
"loss": 0.6334, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6796116504854369, |
|
"grad_norm": 1.3591820052552963, |
|
"learning_rate": 7.970202979738426e-05, |
|
"loss": 0.6376, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6893203883495146, |
|
"grad_norm": 1.5805931826326196, |
|
"learning_rate": 7.966804947456599e-05, |
|
"loss": 0.632, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6990291262135923, |
|
"grad_norm": 1.334661947485613, |
|
"learning_rate": 7.96322428314608e-05, |
|
"loss": 0.6269, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.7087378640776699, |
|
"grad_norm": 1.8903308561526113, |
|
"learning_rate": 7.959461151660952e-05, |
|
"loss": 0.6342, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.7184466019417476, |
|
"grad_norm": 1.1038829068258371, |
|
"learning_rate": 7.955515726256101e-05, |
|
"loss": 0.6275, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.7281553398058253, |
|
"grad_norm": 1.5525916361050445, |
|
"learning_rate": 7.951388188579237e-05, |
|
"loss": 0.6343, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7378640776699029, |
|
"grad_norm": 1.5390664333547033, |
|
"learning_rate": 7.94707872866254e-05, |
|
"loss": 0.6315, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.7475728155339806, |
|
"grad_norm": 1.769589829850563, |
|
"learning_rate": 7.942587544913901e-05, |
|
"loss": 0.6329, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.7572815533980582, |
|
"grad_norm": 1.4148162374660378, |
|
"learning_rate": 7.937914844107791e-05, |
|
"loss": 0.6297, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.7669902912621359, |
|
"grad_norm": 1.7973463567105696, |
|
"learning_rate": 7.933060841375745e-05, |
|
"loss": 0.627, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.7766990291262136, |
|
"grad_norm": 1.184326171503996, |
|
"learning_rate": 7.928025760196447e-05, |
|
"loss": 0.6234, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7864077669902912, |
|
"grad_norm": 1.6486867376699348, |
|
"learning_rate": 7.922809832385456e-05, |
|
"loss": 0.6224, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7961165048543689, |
|
"grad_norm": 1.2312763534767475, |
|
"learning_rate": 7.917413298084519e-05, |
|
"loss": 0.6207, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.8058252427184466, |
|
"grad_norm": 1.319948768871293, |
|
"learning_rate": 7.911836405750525e-05, |
|
"loss": 0.618, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.8155339805825242, |
|
"grad_norm": 1.2459374631380746, |
|
"learning_rate": 7.906079412144055e-05, |
|
"loss": 0.6215, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.8252427184466019, |
|
"grad_norm": 1.2798874980653692, |
|
"learning_rate": 7.900142582317576e-05, |
|
"loss": 0.6172, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8349514563106796, |
|
"grad_norm": 2.093389549775017, |
|
"learning_rate": 7.894026189603225e-05, |
|
"loss": 0.618, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.8446601941747572, |
|
"grad_norm": 1.1300093559740532, |
|
"learning_rate": 7.887730515600227e-05, |
|
"loss": 0.6173, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.8543689320388349, |
|
"grad_norm": 2.3541947389099094, |
|
"learning_rate": 7.881255850161939e-05, |
|
"loss": 0.6243, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.8640776699029126, |
|
"grad_norm": 1.5613220102186438, |
|
"learning_rate": 7.87460249138249e-05, |
|
"loss": 0.623, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.8737864077669902, |
|
"grad_norm": 1.9104766159006328, |
|
"learning_rate": 7.867770745583074e-05, |
|
"loss": 0.6241, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.883495145631068, |
|
"grad_norm": 1.419392875826999, |
|
"learning_rate": 7.860760927297833e-05, |
|
"loss": 0.62, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.8932038834951457, |
|
"grad_norm": 1.3282640676407322, |
|
"learning_rate": 7.853573359259381e-05, |
|
"loss": 0.6166, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.9029126213592233, |
|
"grad_norm": 1.3250464130522686, |
|
"learning_rate": 7.846208372383947e-05, |
|
"loss": 0.6179, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.912621359223301, |
|
"grad_norm": 1.1377495249178353, |
|
"learning_rate": 7.838666305756138e-05, |
|
"loss": 0.6122, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.9223300970873787, |
|
"grad_norm": 1.4646244528960923, |
|
"learning_rate": 7.830947506613324e-05, |
|
"loss": 0.6105, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9320388349514563, |
|
"grad_norm": 1.142509028160319, |
|
"learning_rate": 7.823052330329663e-05, |
|
"loss": 0.611, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.941747572815534, |
|
"grad_norm": 2.0274152032009494, |
|
"learning_rate": 7.81498114039972e-05, |
|
"loss": 0.616, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.9514563106796117, |
|
"grad_norm": 1.0686288368525192, |
|
"learning_rate": 7.806734308421753e-05, |
|
"loss": 0.6101, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.9611650485436893, |
|
"grad_norm": 2.0849531670929626, |
|
"learning_rate": 7.798312214080588e-05, |
|
"loss": 0.6128, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.970873786407767, |
|
"grad_norm": 1.723568607059033, |
|
"learning_rate": 7.789715245130148e-05, |
|
"loss": 0.6156, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9805825242718447, |
|
"grad_norm": 1.0991520002736015, |
|
"learning_rate": 7.780943797375594e-05, |
|
"loss": 0.6028, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.9902912621359223, |
|
"grad_norm": 1.81475046862089, |
|
"learning_rate": 7.77199827465511e-05, |
|
"loss": 0.6176, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0995121126222103, |
|
"learning_rate": 7.762879088821302e-05, |
|
"loss": 0.612, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.0097087378640777, |
|
"grad_norm": 1.1739346884276352, |
|
"learning_rate": 7.753586659722243e-05, |
|
"loss": 0.5928, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.0194174757281553, |
|
"grad_norm": 1.475480407854494, |
|
"learning_rate": 7.74412141518214e-05, |
|
"loss": 0.5989, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.029126213592233, |
|
"grad_norm": 1.3719226999649305, |
|
"learning_rate": 7.734483790981636e-05, |
|
"loss": 0.5969, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0388349514563107, |
|
"grad_norm": 1.0726737236370019, |
|
"learning_rate": 7.724674230837747e-05, |
|
"loss": 0.5825, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.0485436893203883, |
|
"grad_norm": 1.3362590355075374, |
|
"learning_rate": 7.714693186383437e-05, |
|
"loss": 0.594, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.058252427184466, |
|
"grad_norm": 0.820555469002108, |
|
"learning_rate": 7.704541117146819e-05, |
|
"loss": 0.5874, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.0679611650485437, |
|
"grad_norm": 1.3323408029024517, |
|
"learning_rate": 7.694218490530004e-05, |
|
"loss": 0.5886, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0776699029126213, |
|
"grad_norm": 1.066797450314409, |
|
"learning_rate": 7.683725781787574e-05, |
|
"loss": 0.5876, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.087378640776699, |
|
"grad_norm": 1.245072674676843, |
|
"learning_rate": 7.673063474004715e-05, |
|
"loss": 0.5879, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.0970873786407767, |
|
"grad_norm": 1.2819802358790717, |
|
"learning_rate": 7.662232058074957e-05, |
|
"loss": 0.5864, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.1067961165048543, |
|
"grad_norm": 1.436129834544921, |
|
"learning_rate": 7.651232032677588e-05, |
|
"loss": 0.5919, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.116504854368932, |
|
"grad_norm": 0.8175594211587869, |
|
"learning_rate": 7.640063904254691e-05, |
|
"loss": 0.5817, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1262135922330097, |
|
"grad_norm": 1.687072132305568, |
|
"learning_rate": 7.628728186987824e-05, |
|
"loss": 0.5848, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.1359223300970873, |
|
"grad_norm": 1.1326446741047016, |
|
"learning_rate": 7.617225402774348e-05, |
|
"loss": 0.5858, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.145631067961165, |
|
"grad_norm": 0.82496802522063, |
|
"learning_rate": 7.605556081203405e-05, |
|
"loss": 0.5812, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.1553398058252426, |
|
"grad_norm": 1.250641283128446, |
|
"learning_rate": 7.593720759531526e-05, |
|
"loss": 0.5876, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.1650485436893203, |
|
"grad_norm": 1.1921582278913663, |
|
"learning_rate": 7.581719982657903e-05, |
|
"loss": 0.5842, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.174757281553398, |
|
"grad_norm": 1.2734225443412055, |
|
"learning_rate": 7.569554303099296e-05, |
|
"loss": 0.579, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.1844660194174756, |
|
"grad_norm": 1.1853318091968652, |
|
"learning_rate": 7.557224280964603e-05, |
|
"loss": 0.582, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.1941747572815533, |
|
"grad_norm": 1.152376249051554, |
|
"learning_rate": 7.544730483929065e-05, |
|
"loss": 0.5836, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.203883495145631, |
|
"grad_norm": 1.3041878863702414, |
|
"learning_rate": 7.532073487208132e-05, |
|
"loss": 0.5761, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.2135922330097086, |
|
"grad_norm": 1.3112880078778733, |
|
"learning_rate": 7.519253873530986e-05, |
|
"loss": 0.5836, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2233009708737863, |
|
"grad_norm": 1.3474986040772934, |
|
"learning_rate": 7.5062722331137e-05, |
|
"loss": 0.5801, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.233009708737864, |
|
"grad_norm": 0.8129513286807843, |
|
"learning_rate": 7.493129163632076e-05, |
|
"loss": 0.5748, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.2427184466019416, |
|
"grad_norm": 1.1010576487307613, |
|
"learning_rate": 7.479825270194124e-05, |
|
"loss": 0.5842, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.2524271844660193, |
|
"grad_norm": 1.398672981702769, |
|
"learning_rate": 7.466361165312199e-05, |
|
"loss": 0.5877, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.262135922330097, |
|
"grad_norm": 1.2164255288325172, |
|
"learning_rate": 7.452737468874809e-05, |
|
"loss": 0.5826, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.2718446601941746, |
|
"grad_norm": 0.7866460429939296, |
|
"learning_rate": 7.438954808118064e-05, |
|
"loss": 0.5778, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.2815533980582523, |
|
"grad_norm": 1.2501923242149588, |
|
"learning_rate": 7.425013817596812e-05, |
|
"loss": 0.5797, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.29126213592233, |
|
"grad_norm": 1.3225455051065684, |
|
"learning_rate": 7.41091513915541e-05, |
|
"loss": 0.578, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.3009708737864076, |
|
"grad_norm": 1.0105911545481185, |
|
"learning_rate": 7.396659421898183e-05, |
|
"loss": 0.5754, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.3106796116504853, |
|
"grad_norm": 1.2271731398121772, |
|
"learning_rate": 7.382247322159534e-05, |
|
"loss": 0.5808, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3203883495145632, |
|
"grad_norm": 1.1215866709600937, |
|
"learning_rate": 7.367679503473732e-05, |
|
"loss": 0.5738, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.3300970873786409, |
|
"grad_norm": 1.0715712124069652, |
|
"learning_rate": 7.352956636544358e-05, |
|
"loss": 0.5754, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.3398058252427185, |
|
"grad_norm": 0.9077358541837017, |
|
"learning_rate": 7.338079399213424e-05, |
|
"loss": 0.5711, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.3495145631067962, |
|
"grad_norm": 1.064355488383383, |
|
"learning_rate": 7.32304847643017e-05, |
|
"loss": 0.5698, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.3592233009708738, |
|
"grad_norm": 0.9384952345701946, |
|
"learning_rate": 7.30786456021953e-05, |
|
"loss": 0.5739, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.3689320388349515, |
|
"grad_norm": 1.1821144606047003, |
|
"learning_rate": 7.292528349650262e-05, |
|
"loss": 0.5701, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.3786407766990292, |
|
"grad_norm": 0.97575327357092, |
|
"learning_rate": 7.277040550802776e-05, |
|
"loss": 0.573, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.3883495145631068, |
|
"grad_norm": 1.1580935526594842, |
|
"learning_rate": 7.261401876736611e-05, |
|
"loss": 0.5751, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.3980582524271845, |
|
"grad_norm": 0.9292779139246362, |
|
"learning_rate": 7.245613047457621e-05, |
|
"loss": 0.5718, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.4077669902912622, |
|
"grad_norm": 1.2215840715546447, |
|
"learning_rate": 7.229674789884813e-05, |
|
"loss": 0.5749, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.4174757281553398, |
|
"grad_norm": 0.9138289618139581, |
|
"learning_rate": 7.213587837816889e-05, |
|
"loss": 0.5696, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.4271844660194175, |
|
"grad_norm": 1.0533170144837214, |
|
"learning_rate": 7.197352931898454e-05, |
|
"loss": 0.5726, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.4368932038834952, |
|
"grad_norm": 1.1454322229077762, |
|
"learning_rate": 7.180970819585923e-05, |
|
"loss": 0.5712, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.4466019417475728, |
|
"grad_norm": 1.0300828181718542, |
|
"learning_rate": 7.164442255113107e-05, |
|
"loss": 0.5717, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.4563106796116505, |
|
"grad_norm": 1.077162592235673, |
|
"learning_rate": 7.147767999456484e-05, |
|
"loss": 0.5785, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4660194174757282, |
|
"grad_norm": 1.2410959167210567, |
|
"learning_rate": 7.130948820300166e-05, |
|
"loss": 0.5771, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.4757281553398058, |
|
"grad_norm": 1.2329275408989433, |
|
"learning_rate": 7.113985492000558e-05, |
|
"loss": 0.5683, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.4854368932038835, |
|
"grad_norm": 1.0776714897294795, |
|
"learning_rate": 7.0968787955507e-05, |
|
"loss": 0.5716, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.4951456310679612, |
|
"grad_norm": 0.8984266405483924, |
|
"learning_rate": 7.079629518544312e-05, |
|
"loss": 0.5726, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.5048543689320388, |
|
"grad_norm": 1.1047143317546186, |
|
"learning_rate": 7.062238455139544e-05, |
|
"loss": 0.5754, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.5145631067961165, |
|
"grad_norm": 1.3200995574966732, |
|
"learning_rate": 7.044706406022393e-05, |
|
"loss": 0.565, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.5242718446601942, |
|
"grad_norm": 0.795738070159204, |
|
"learning_rate": 7.027034178369853e-05, |
|
"loss": 0.57, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.5339805825242718, |
|
"grad_norm": 0.9946136430312941, |
|
"learning_rate": 7.009222585812755e-05, |
|
"loss": 0.5733, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.5436893203883495, |
|
"grad_norm": 1.376070028063213, |
|
"learning_rate": 6.991272448398291e-05, |
|
"loss": 0.5756, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.5533980582524272, |
|
"grad_norm": 0.7388882390316432, |
|
"learning_rate": 6.973184592552283e-05, |
|
"loss": 0.5654, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5631067961165048, |
|
"grad_norm": 1.1905180684222885, |
|
"learning_rate": 6.95495985104111e-05, |
|
"loss": 0.5711, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.5728155339805825, |
|
"grad_norm": 0.8020799108353651, |
|
"learning_rate": 6.93659906293338e-05, |
|
"loss": 0.5623, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.5825242718446602, |
|
"grad_norm": 1.0231881194650578, |
|
"learning_rate": 6.918103073561304e-05, |
|
"loss": 0.57, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.5922330097087378, |
|
"grad_norm": 0.7935997277801292, |
|
"learning_rate": 6.899472734481765e-05, |
|
"loss": 0.5697, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.6019417475728155, |
|
"grad_norm": 1.0863918666939771, |
|
"learning_rate": 6.880708903437116e-05, |
|
"loss": 0.5649, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6116504854368932, |
|
"grad_norm": 0.7946977064700003, |
|
"learning_rate": 6.8618124443157e-05, |
|
"loss": 0.5629, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.6213592233009708, |
|
"grad_norm": 0.7600587207085567, |
|
"learning_rate": 6.842784227112057e-05, |
|
"loss": 0.5652, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.6310679611650487, |
|
"grad_norm": 0.9222151118870435, |
|
"learning_rate": 6.823625127886888e-05, |
|
"loss": 0.5602, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.6407766990291264, |
|
"grad_norm": 0.8500423865673382, |
|
"learning_rate": 6.804336028726706e-05, |
|
"loss": 0.5638, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.650485436893204, |
|
"grad_norm": 1.009548584428779, |
|
"learning_rate": 6.78491781770324e-05, |
|
"loss": 0.5609, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6601941747572817, |
|
"grad_norm": 0.8596245784399692, |
|
"learning_rate": 6.765371388832531e-05, |
|
"loss": 0.5606, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.6699029126213594, |
|
"grad_norm": 1.0991682854870917, |
|
"learning_rate": 6.745697642033791e-05, |
|
"loss": 0.5645, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.679611650485437, |
|
"grad_norm": 0.9305228321364903, |
|
"learning_rate": 6.725897483087948e-05, |
|
"loss": 0.5572, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.6893203883495147, |
|
"grad_norm": 0.9614358976051515, |
|
"learning_rate": 6.705971823595964e-05, |
|
"loss": 0.5616, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.6990291262135924, |
|
"grad_norm": 1.0715923701630468, |
|
"learning_rate": 6.685921580936855e-05, |
|
"loss": 0.5629, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.70873786407767, |
|
"grad_norm": 1.1630310321701869, |
|
"learning_rate": 6.665747678225454e-05, |
|
"loss": 0.565, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.7184466019417477, |
|
"grad_norm": 0.8590469522925508, |
|
"learning_rate": 6.645451044269916e-05, |
|
"loss": 0.5585, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.7281553398058254, |
|
"grad_norm": 0.7340387980261763, |
|
"learning_rate": 6.62503261352895e-05, |
|
"loss": 0.5613, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.737864077669903, |
|
"grad_norm": 0.710845727118427, |
|
"learning_rate": 6.6044933260688e-05, |
|
"loss": 0.5562, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.7475728155339807, |
|
"grad_norm": 0.8027866048243354, |
|
"learning_rate": 6.583834127519966e-05, |
|
"loss": 0.5616, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.7572815533980584, |
|
"grad_norm": 1.0941839867674634, |
|
"learning_rate": 6.563055969033659e-05, |
|
"loss": 0.5591, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.766990291262136, |
|
"grad_norm": 1.066378946036087, |
|
"learning_rate": 6.54215980723802e-05, |
|
"loss": 0.5612, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.7766990291262137, |
|
"grad_norm": 0.7626588888095535, |
|
"learning_rate": 6.521146604194073e-05, |
|
"loss": 0.5528, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.7864077669902914, |
|
"grad_norm": 0.8152238302302431, |
|
"learning_rate": 6.500017327351425e-05, |
|
"loss": 0.559, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.796116504854369, |
|
"grad_norm": 1.0106535990003993, |
|
"learning_rate": 6.478772949503735e-05, |
|
"loss": 0.5586, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.8058252427184467, |
|
"grad_norm": 1.3373530495717711, |
|
"learning_rate": 6.457414448743922e-05, |
|
"loss": 0.5602, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.8155339805825244, |
|
"grad_norm": 0.6183996673247697, |
|
"learning_rate": 6.435942808419129e-05, |
|
"loss": 0.5563, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.825242718446602, |
|
"grad_norm": 0.4513128772050125, |
|
"learning_rate": 6.41435901708546e-05, |
|
"loss": 0.5558, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.8349514563106797, |
|
"grad_norm": 0.8068976523956712, |
|
"learning_rate": 6.392664068462455e-05, |
|
"loss": 0.5535, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.8446601941747574, |
|
"grad_norm": 1.1435206608085113, |
|
"learning_rate": 6.370858961387348e-05, |
|
"loss": 0.5589, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.854368932038835, |
|
"grad_norm": 0.9238122145618127, |
|
"learning_rate": 6.348944699769078e-05, |
|
"loss": 0.5521, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.8640776699029127, |
|
"grad_norm": 0.7049348477803952, |
|
"learning_rate": 6.326922292542067e-05, |
|
"loss": 0.5533, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.8737864077669903, |
|
"grad_norm": 0.4705716067710833, |
|
"learning_rate": 6.304792753619768e-05, |
|
"loss": 0.555, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.883495145631068, |
|
"grad_norm": 0.5562958157730256, |
|
"learning_rate": 6.282557101847989e-05, |
|
"loss": 0.554, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.8932038834951457, |
|
"grad_norm": 0.7776677254914264, |
|
"learning_rate": 6.260216360957982e-05, |
|
"loss": 0.5554, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.9029126213592233, |
|
"grad_norm": 0.9626928256285646, |
|
"learning_rate": 6.237771559519309e-05, |
|
"loss": 0.557, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.912621359223301, |
|
"grad_norm": 1.0125011253597305, |
|
"learning_rate": 6.215223730892488e-05, |
|
"loss": 0.5576, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.9223300970873787, |
|
"grad_norm": 0.8791857222410507, |
|
"learning_rate": 6.192573913181423e-05, |
|
"loss": 0.5601, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.9320388349514563, |
|
"grad_norm": 0.7995737202735174, |
|
"learning_rate": 6.169823149185594e-05, |
|
"loss": 0.558, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.941747572815534, |
|
"grad_norm": 1.1194920590127924, |
|
"learning_rate": 6.146972486352062e-05, |
|
"loss": 0.5547, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9514563106796117, |
|
"grad_norm": 0.8287113779997599, |
|
"learning_rate": 6.124022976727246e-05, |
|
"loss": 0.5562, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.9611650485436893, |
|
"grad_norm": 0.5720384230610551, |
|
"learning_rate": 6.1009756769084625e-05, |
|
"loss": 0.5549, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.970873786407767, |
|
"grad_norm": 0.9412252549729389, |
|
"learning_rate": 6.077831647995312e-05, |
|
"loss": 0.5561, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.9805825242718447, |
|
"grad_norm": 1.1651868503900755, |
|
"learning_rate": 6.0545919555408026e-05, |
|
"loss": 0.5549, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.9902912621359223, |
|
"grad_norm": 0.6059833274896135, |
|
"learning_rate": 6.0312576695023015e-05, |
|
"loss": 0.5532, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.7944875296787923, |
|
"learning_rate": 6.007829864192274e-05, |
|
"loss": 0.5471, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 2.0097087378640777, |
|
"grad_norm": 0.9861185591611118, |
|
"learning_rate": 5.9843096182288184e-05, |
|
"loss": 0.5254, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 2.0194174757281553, |
|
"grad_norm": 0.9193682360720784, |
|
"learning_rate": 5.960698014486009e-05, |
|
"loss": 0.5228, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 2.029126213592233, |
|
"grad_norm": 0.8942942482878242, |
|
"learning_rate": 5.936996140044041e-05, |
|
"loss": 0.5213, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 2.0388349514563107, |
|
"grad_norm": 0.9820673732498754, |
|
"learning_rate": 5.9132050861391774e-05, |
|
"loss": 0.5281, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0485436893203883, |
|
"grad_norm": 1.124245573357021, |
|
"learning_rate": 5.889325948113513e-05, |
|
"loss": 0.5252, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 2.058252427184466, |
|
"grad_norm": 0.9569857392646174, |
|
"learning_rate": 5.865359825364543e-05, |
|
"loss": 0.5295, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0679611650485437, |
|
"grad_norm": 0.6979418510578903, |
|
"learning_rate": 5.841307821294546e-05, |
|
"loss": 0.5212, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 2.0776699029126213, |
|
"grad_norm": 0.8030912411204089, |
|
"learning_rate": 5.8171710432597824e-05, |
|
"loss": 0.525, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.087378640776699, |
|
"grad_norm": 1.0921718953136728, |
|
"learning_rate": 5.792950602519516e-05, |
|
"loss": 0.5249, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.0970873786407767, |
|
"grad_norm": 0.6815361141499702, |
|
"learning_rate": 5.768647614184846e-05, |
|
"loss": 0.5256, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 2.1067961165048543, |
|
"grad_norm": 0.5765577764382596, |
|
"learning_rate": 5.744263197167369e-05, |
|
"loss": 0.5196, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 2.116504854368932, |
|
"grad_norm": 0.8497374889157474, |
|
"learning_rate": 5.719798474127668e-05, |
|
"loss": 0.5245, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 2.1262135922330097, |
|
"grad_norm": 0.8473330942862705, |
|
"learning_rate": 5.69525457142362e-05, |
|
"loss": 0.5266, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 2.1359223300970873, |
|
"grad_norm": 0.7383851235625609, |
|
"learning_rate": 5.6706326190585416e-05, |
|
"loss": 0.5241, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.145631067961165, |
|
"grad_norm": 0.731625229205727, |
|
"learning_rate": 5.6459337506291594e-05, |
|
"loss": 0.5214, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 2.1553398058252426, |
|
"grad_norm": 0.6257327662890664, |
|
"learning_rate": 5.621159103273424e-05, |
|
"loss": 0.521, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 2.1650485436893203, |
|
"grad_norm": 0.5394134102082522, |
|
"learning_rate": 5.596309817618156e-05, |
|
"loss": 0.5202, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 2.174757281553398, |
|
"grad_norm": 0.5108441929525305, |
|
"learning_rate": 5.571387037726524e-05, |
|
"loss": 0.5243, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 2.1844660194174756, |
|
"grad_norm": 0.6078187548666663, |
|
"learning_rate": 5.5463919110453836e-05, |
|
"loss": 0.5196, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.1941747572815533, |
|
"grad_norm": 0.663473200149725, |
|
"learning_rate": 5.521325588352437e-05, |
|
"loss": 0.5194, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 2.203883495145631, |
|
"grad_norm": 0.6429831831672274, |
|
"learning_rate": 5.496189223703262e-05, |
|
"loss": 0.5173, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 2.2135922330097086, |
|
"grad_norm": 0.6220750410182839, |
|
"learning_rate": 5.47098397437817e-05, |
|
"loss": 0.5209, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 2.2233009708737863, |
|
"grad_norm": 0.5886484774766658, |
|
"learning_rate": 5.4457110008289306e-05, |
|
"loss": 0.5155, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 2.233009708737864, |
|
"grad_norm": 0.4437653008171185, |
|
"learning_rate": 5.420371466625339e-05, |
|
"loss": 0.5185, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.2427184466019416, |
|
"grad_norm": 0.39870906238532716, |
|
"learning_rate": 5.3949665384016556e-05, |
|
"loss": 0.5217, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 2.2524271844660193, |
|
"grad_norm": 0.5717388200227635, |
|
"learning_rate": 5.369497385802877e-05, |
|
"loss": 0.5196, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 2.262135922330097, |
|
"grad_norm": 0.6577624762016366, |
|
"learning_rate": 5.3439651814309044e-05, |
|
"loss": 0.5182, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 2.2718446601941746, |
|
"grad_norm": 0.6946628514135419, |
|
"learning_rate": 5.3183711007905434e-05, |
|
"loss": 0.5188, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 2.2815533980582523, |
|
"grad_norm": 0.6759552411316988, |
|
"learning_rate": 5.2927163222353876e-05, |
|
"loss": 0.5171, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.29126213592233, |
|
"grad_norm": 0.6300902669161529, |
|
"learning_rate": 5.2670020269135703e-05, |
|
"loss": 0.5224, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 2.3009708737864076, |
|
"grad_norm": 0.5185045096628276, |
|
"learning_rate": 5.241229398713379e-05, |
|
"loss": 0.5268, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 2.3106796116504853, |
|
"grad_norm": 0.4449400609212431, |
|
"learning_rate": 5.2153996242087544e-05, |
|
"loss": 0.5207, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 2.320388349514563, |
|
"grad_norm": 0.4056127362971999, |
|
"learning_rate": 5.1895138926046553e-05, |
|
"loss": 0.5207, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 2.3300970873786406, |
|
"grad_norm": 0.31876595932444957, |
|
"learning_rate": 5.16357339568231e-05, |
|
"loss": 0.5233, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.3398058252427183, |
|
"grad_norm": 0.3826214088490694, |
|
"learning_rate": 5.13757932774435e-05, |
|
"loss": 0.5229, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 2.349514563106796, |
|
"grad_norm": 0.39293509694649387, |
|
"learning_rate": 5.111532885559816e-05, |
|
"loss": 0.517, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 2.3592233009708736, |
|
"grad_norm": 0.3322451238923787, |
|
"learning_rate": 5.08543526830907e-05, |
|
"loss": 0.5183, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 2.3689320388349513, |
|
"grad_norm": 0.35774381456905935, |
|
"learning_rate": 5.05928767752857e-05, |
|
"loss": 0.5207, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.378640776699029, |
|
"grad_norm": 0.3490143073371343, |
|
"learning_rate": 5.033091317055565e-05, |
|
"loss": 0.5185, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3883495145631066, |
|
"grad_norm": 0.3488449914720745, |
|
"learning_rate": 5.006847392972664e-05, |
|
"loss": 0.5233, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 2.3980582524271843, |
|
"grad_norm": 0.4080875571104722, |
|
"learning_rate": 4.9805571135523066e-05, |
|
"loss": 0.5196, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 2.407766990291262, |
|
"grad_norm": 0.3959917551246165, |
|
"learning_rate": 4.954221689201138e-05, |
|
"loss": 0.5194, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 2.4174757281553396, |
|
"grad_norm": 0.331330529722134, |
|
"learning_rate": 4.9278423324042776e-05, |
|
"loss": 0.5175, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 2.4271844660194173, |
|
"grad_norm": 0.3809362865456529, |
|
"learning_rate": 4.901420257669501e-05, |
|
"loss": 0.5195, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.436893203883495, |
|
"grad_norm": 0.3961884838888781, |
|
"learning_rate": 4.8749566814713204e-05, |
|
"loss": 0.5203, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 2.4466019417475726, |
|
"grad_norm": 0.28927135799547965, |
|
"learning_rate": 4.848452822194977e-05, |
|
"loss": 0.522, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 2.4563106796116507, |
|
"grad_norm": 0.32321223873962246, |
|
"learning_rate": 4.821909900080348e-05, |
|
"loss": 0.5192, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 2.466019417475728, |
|
"grad_norm": 0.3034931174843224, |
|
"learning_rate": 4.7953291371657724e-05, |
|
"loss": 0.5214, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 2.475728155339806, |
|
"grad_norm": 0.2955075395432886, |
|
"learning_rate": 4.768711757231775e-05, |
|
"loss": 0.5197, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.4854368932038833, |
|
"grad_norm": 0.33319864095909213, |
|
"learning_rate": 4.742058985744738e-05, |
|
"loss": 0.5199, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 2.4951456310679614, |
|
"grad_norm": 0.3018636044546277, |
|
"learning_rate": 4.715372049800467e-05, |
|
"loss": 0.5234, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 2.5048543689320386, |
|
"grad_norm": 0.28068625121120355, |
|
"learning_rate": 4.688652178067708e-05, |
|
"loss": 0.5223, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 2.5145631067961167, |
|
"grad_norm": 0.3229351457016303, |
|
"learning_rate": 4.661900600731571e-05, |
|
"loss": 0.5233, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 2.524271844660194, |
|
"grad_norm": 0.3762838763816941, |
|
"learning_rate": 4.635118549436895e-05, |
|
"loss": 0.5238, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.533980582524272, |
|
"grad_norm": 0.36897391891079456, |
|
"learning_rate": 4.608307257231541e-05, |
|
"loss": 0.5184, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 2.5436893203883493, |
|
"grad_norm": 0.3195793897829173, |
|
"learning_rate": 4.5814679585096265e-05, |
|
"loss": 0.5169, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 2.5533980582524274, |
|
"grad_norm": 0.2773810615279961, |
|
"learning_rate": 4.5546018889546876e-05, |
|
"loss": 0.5168, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 2.5631067961165046, |
|
"grad_norm": 0.3441559401937381, |
|
"learning_rate": 4.527710285482799e-05, |
|
"loss": 0.517, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.5728155339805827, |
|
"grad_norm": 0.37273218540100866, |
|
"learning_rate": 4.500794386185609e-05, |
|
"loss": 0.5185, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.58252427184466, |
|
"grad_norm": 0.26503820958744123, |
|
"learning_rate": 4.473855430273355e-05, |
|
"loss": 0.5164, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 2.592233009708738, |
|
"grad_norm": 0.2934088823294493, |
|
"learning_rate": 4.4468946580178026e-05, |
|
"loss": 0.5127, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 2.6019417475728153, |
|
"grad_norm": 0.3186295434544236, |
|
"learning_rate": 4.4199133106951407e-05, |
|
"loss": 0.5173, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 2.6116504854368934, |
|
"grad_norm": 0.3309244613515348, |
|
"learning_rate": 4.3929126305288364e-05, |
|
"loss": 0.5229, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 2.6213592233009706, |
|
"grad_norm": 0.26814510063287106, |
|
"learning_rate": 4.365893860632444e-05, |
|
"loss": 0.5167, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.6310679611650487, |
|
"grad_norm": 0.3074091286659034, |
|
"learning_rate": 4.338858244952369e-05, |
|
"loss": 0.5156, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 2.6407766990291264, |
|
"grad_norm": 0.3823340679989687, |
|
"learning_rate": 4.3118070282106e-05, |
|
"loss": 0.5168, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.650485436893204, |
|
"grad_norm": 0.47904147679754805, |
|
"learning_rate": 4.2847414558473987e-05, |
|
"loss": 0.5184, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 2.6601941747572817, |
|
"grad_norm": 0.4269268816899063, |
|
"learning_rate": 4.257662773963961e-05, |
|
"loss": 0.5173, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.6699029126213594, |
|
"grad_norm": 0.3094464875254195, |
|
"learning_rate": 4.230572229265045e-05, |
|
"loss": 0.5142, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.679611650485437, |
|
"grad_norm": 0.31791889845655724, |
|
"learning_rate": 4.2034710690015766e-05, |
|
"loss": 0.517, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.6893203883495147, |
|
"grad_norm": 0.4064644387432894, |
|
"learning_rate": 4.17636054091322e-05, |
|
"loss": 0.516, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 2.6990291262135924, |
|
"grad_norm": 0.3053310406953197, |
|
"learning_rate": 4.1492418931709366e-05, |
|
"loss": 0.5175, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.70873786407767, |
|
"grad_norm": 0.2765928946593284, |
|
"learning_rate": 4.1221163743195175e-05, |
|
"loss": 0.5185, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 2.7184466019417477, |
|
"grad_norm": 0.3378318482897848, |
|
"learning_rate": 4.094985233220098e-05, |
|
"loss": 0.5211, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.7281553398058254, |
|
"grad_norm": 0.3143734127353884, |
|
"learning_rate": 4.067849718992665e-05, |
|
"loss": 0.5197, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 2.737864077669903, |
|
"grad_norm": 0.259820524121846, |
|
"learning_rate": 4.040711080958547e-05, |
|
"loss": 0.5259, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.7475728155339807, |
|
"grad_norm": 0.32611605738060934, |
|
"learning_rate": 4.013570568582883e-05, |
|
"loss": 0.5174, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 2.7572815533980584, |
|
"grad_norm": 0.29760137014246574, |
|
"learning_rate": 3.986429431417118e-05, |
|
"loss": 0.5124, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.766990291262136, |
|
"grad_norm": 0.26849482707048117, |
|
"learning_rate": 3.959288919041455e-05, |
|
"loss": 0.5116, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.7766990291262137, |
|
"grad_norm": 0.28358089368587186, |
|
"learning_rate": 3.9321502810073354e-05, |
|
"loss": 0.5179, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.7864077669902914, |
|
"grad_norm": 0.3098687865760963, |
|
"learning_rate": 3.905014766779904e-05, |
|
"loss": 0.5148, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 2.796116504854369, |
|
"grad_norm": 0.3018433729775123, |
|
"learning_rate": 3.8778836256804845e-05, |
|
"loss": 0.5165, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.8058252427184467, |
|
"grad_norm": 0.23893414660568268, |
|
"learning_rate": 3.850758106829065e-05, |
|
"loss": 0.5161, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 2.8155339805825244, |
|
"grad_norm": 0.2524448827230724, |
|
"learning_rate": 3.823639459086781e-05, |
|
"loss": 0.5152, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.825242718446602, |
|
"grad_norm": 0.2770033187618851, |
|
"learning_rate": 3.796528930998425e-05, |
|
"loss": 0.5145, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.8349514563106797, |
|
"grad_norm": 0.25224488768181713, |
|
"learning_rate": 3.769427770734955e-05, |
|
"loss": 0.5195, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.8446601941747574, |
|
"grad_norm": 0.27486298363736394, |
|
"learning_rate": 3.742337226036041e-05, |
|
"loss": 0.5157, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 2.854368932038835, |
|
"grad_norm": 0.23960082819762807, |
|
"learning_rate": 3.715258544152603e-05, |
|
"loss": 0.5154, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.8640776699029127, |
|
"grad_norm": 0.2710779329439077, |
|
"learning_rate": 3.688192971789401e-05, |
|
"loss": 0.5199, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.8737864077669903, |
|
"grad_norm": 0.256469645800299, |
|
"learning_rate": 3.6611417550476324e-05, |
|
"loss": 0.5149, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.883495145631068, |
|
"grad_norm": 0.3010842522468919, |
|
"learning_rate": 3.6341061393675574e-05, |
|
"loss": 0.5188, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.8932038834951457, |
|
"grad_norm": 0.24615248139015927, |
|
"learning_rate": 3.607087369471164e-05, |
|
"loss": 0.5142, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.9029126213592233, |
|
"grad_norm": 0.3025647213003904, |
|
"learning_rate": 3.580086689304861e-05, |
|
"loss": 0.5188, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.912621359223301, |
|
"grad_norm": 0.2506413395712758, |
|
"learning_rate": 3.553105341982198e-05, |
|
"loss": 0.5127, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.9223300970873787, |
|
"grad_norm": 0.26488744144075266, |
|
"learning_rate": 3.526144569726647e-05, |
|
"loss": 0.5148, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.9320388349514563, |
|
"grad_norm": 0.23180961021643326, |
|
"learning_rate": 3.499205613814393e-05, |
|
"loss": 0.518, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.941747572815534, |
|
"grad_norm": 0.24614944764274665, |
|
"learning_rate": 3.472289714517203e-05, |
|
"loss": 0.5119, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 2.9514563106796117, |
|
"grad_norm": 0.22977263011082621, |
|
"learning_rate": 3.445398111045313e-05, |
|
"loss": 0.5184, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.9611650485436893, |
|
"grad_norm": 0.21286238167178875, |
|
"learning_rate": 3.418532041490375e-05, |
|
"loss": 0.517, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.970873786407767, |
|
"grad_norm": 0.2548130433665424, |
|
"learning_rate": 3.3916927427684595e-05, |
|
"loss": 0.5186, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.9805825242718447, |
|
"grad_norm": 0.31989149824666413, |
|
"learning_rate": 3.364881450563106e-05, |
|
"loss": 0.5153, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.9902912621359223, |
|
"grad_norm": 0.2762238599236643, |
|
"learning_rate": 3.338099399268429e-05, |
|
"loss": 0.5167, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.20481446931191682, |
|
"learning_rate": 3.311347821932292e-05, |
|
"loss": 0.5092, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 3.0097087378640777, |
|
"grad_norm": 0.22631177358316332, |
|
"learning_rate": 3.284627950199535e-05, |
|
"loss": 0.4909, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.0194174757281553, |
|
"grad_norm": 0.21905855651481868, |
|
"learning_rate": 3.2579410142552646e-05, |
|
"loss": 0.4889, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 3.029126213592233, |
|
"grad_norm": 0.268856232171971, |
|
"learning_rate": 3.231288242768226e-05, |
|
"loss": 0.4882, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 3.0388349514563107, |
|
"grad_norm": 0.2631480748788351, |
|
"learning_rate": 3.204670862834228e-05, |
|
"loss": 0.4822, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 3.0485436893203883, |
|
"grad_norm": 0.28121213579534965, |
|
"learning_rate": 3.178090099919653e-05, |
|
"loss": 0.4848, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 3.058252427184466, |
|
"grad_norm": 0.2553829402200111, |
|
"learning_rate": 3.1515471778050246e-05, |
|
"loss": 0.4853, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 3.0679611650485437, |
|
"grad_norm": 0.29584988541778207, |
|
"learning_rate": 3.12504331852868e-05, |
|
"loss": 0.4835, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 3.0776699029126213, |
|
"grad_norm": 0.21912251520340542, |
|
"learning_rate": 3.098579742330499e-05, |
|
"loss": 0.4866, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 3.087378640776699, |
|
"grad_norm": 0.2816186665047795, |
|
"learning_rate": 3.0721576675957224e-05, |
|
"loss": 0.4856, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0970873786407767, |
|
"grad_norm": 0.24175134230414194, |
|
"learning_rate": 3.0457783107988642e-05, |
|
"loss": 0.4886, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 3.1067961165048543, |
|
"grad_norm": 0.22350452741221052, |
|
"learning_rate": 3.0194428864476947e-05, |
|
"loss": 0.4836, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.116504854368932, |
|
"grad_norm": 0.25073180546811125, |
|
"learning_rate": 2.9931526070273374e-05, |
|
"loss": 0.4884, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 3.1262135922330097, |
|
"grad_norm": 0.19079613465758094, |
|
"learning_rate": 2.9669086829444364e-05, |
|
"loss": 0.485, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 3.1359223300970873, |
|
"grad_norm": 0.23960288437553956, |
|
"learning_rate": 2.9407123224714312e-05, |
|
"loss": 0.4856, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 3.145631067961165, |
|
"grad_norm": 0.22627618578886288, |
|
"learning_rate": 2.9145647316909306e-05, |
|
"loss": 0.4862, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 3.1553398058252426, |
|
"grad_norm": 0.1960335183955326, |
|
"learning_rate": 2.8884671144401833e-05, |
|
"loss": 0.4869, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 3.1650485436893203, |
|
"grad_norm": 0.2079951634137142, |
|
"learning_rate": 2.8624206722556508e-05, |
|
"loss": 0.491, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 3.174757281553398, |
|
"grad_norm": 0.18132268629371445, |
|
"learning_rate": 2.8364266043176897e-05, |
|
"loss": 0.4876, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 3.1844660194174756, |
|
"grad_norm": 0.18068511996455502, |
|
"learning_rate": 2.810486107395347e-05, |
|
"loss": 0.4858, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 3.1941747572815533, |
|
"grad_norm": 0.19264695463261375, |
|
"learning_rate": 2.7846003757912473e-05, |
|
"loss": 0.4891, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 3.203883495145631, |
|
"grad_norm": 0.16897146599425877, |
|
"learning_rate": 2.7587706012866227e-05, |
|
"loss": 0.4868, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.2135922330097086, |
|
"grad_norm": 0.2264234031229311, |
|
"learning_rate": 2.7329979730864313e-05, |
|
"loss": 0.4844, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 3.2233009708737863, |
|
"grad_norm": 0.2433703396011944, |
|
"learning_rate": 2.707283677764613e-05, |
|
"loss": 0.4855, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 3.233009708737864, |
|
"grad_norm": 0.1780724623798919, |
|
"learning_rate": 2.6816288992094573e-05, |
|
"loss": 0.4853, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 3.2427184466019416, |
|
"grad_norm": 0.2550763857085153, |
|
"learning_rate": 2.6560348185690956e-05, |
|
"loss": 0.4865, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 3.2524271844660193, |
|
"grad_norm": 0.18943111426329542, |
|
"learning_rate": 2.6305026141971227e-05, |
|
"loss": 0.4869, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 3.262135922330097, |
|
"grad_norm": 0.21284295474198855, |
|
"learning_rate": 2.6050334615983467e-05, |
|
"loss": 0.4872, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 3.2718446601941746, |
|
"grad_norm": 0.22131826031463572, |
|
"learning_rate": 2.5796285333746615e-05, |
|
"loss": 0.4816, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 3.2815533980582523, |
|
"grad_norm": 0.182135484368646, |
|
"learning_rate": 2.554288999171072e-05, |
|
"loss": 0.4915, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 3.29126213592233, |
|
"grad_norm": 0.23761469952772257, |
|
"learning_rate": 2.5290160256218313e-05, |
|
"loss": 0.4853, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 3.3009708737864076, |
|
"grad_norm": 0.1840898568254873, |
|
"learning_rate": 2.5038107762967393e-05, |
|
"loss": 0.4883, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.3106796116504853, |
|
"grad_norm": 0.21359795567551282, |
|
"learning_rate": 2.4786744116475638e-05, |
|
"loss": 0.4871, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 3.320388349514563, |
|
"grad_norm": 0.24890926664546134, |
|
"learning_rate": 2.4536080889546177e-05, |
|
"loss": 0.489, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 3.3300970873786406, |
|
"grad_norm": 0.20782018056580667, |
|
"learning_rate": 2.4286129622734764e-05, |
|
"loss": 0.4844, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 3.3398058252427183, |
|
"grad_norm": 0.21924373313456091, |
|
"learning_rate": 2.4036901823818454e-05, |
|
"loss": 0.4863, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 3.349514563106796, |
|
"grad_norm": 0.19867052689304365, |
|
"learning_rate": 2.378840896726577e-05, |
|
"loss": 0.4908, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 3.3592233009708736, |
|
"grad_norm": 0.2327076797489768, |
|
"learning_rate": 2.3540662493708423e-05, |
|
"loss": 0.4869, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 3.3689320388349513, |
|
"grad_norm": 0.17819481518798666, |
|
"learning_rate": 2.3293673809414598e-05, |
|
"loss": 0.4816, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 3.378640776699029, |
|
"grad_norm": 0.17940622231016476, |
|
"learning_rate": 2.3047454285763793e-05, |
|
"loss": 0.4874, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 3.3883495145631066, |
|
"grad_norm": 0.16039118497959268, |
|
"learning_rate": 2.2802015258723324e-05, |
|
"loss": 0.4869, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 3.3980582524271843, |
|
"grad_norm": 0.17049865419413107, |
|
"learning_rate": 2.2557368028326324e-05, |
|
"loss": 0.4845, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.407766990291262, |
|
"grad_norm": 0.18902552117069968, |
|
"learning_rate": 2.2313523858151554e-05, |
|
"loss": 0.4872, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 3.4174757281553396, |
|
"grad_norm": 0.15937546411881842, |
|
"learning_rate": 2.207049397480485e-05, |
|
"loss": 0.4845, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 3.4271844660194173, |
|
"grad_norm": 0.19899647930250075, |
|
"learning_rate": 2.1828289567402173e-05, |
|
"loss": 0.4874, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 3.436893203883495, |
|
"grad_norm": 0.17339320655418355, |
|
"learning_rate": 2.1586921787054564e-05, |
|
"loss": 0.4871, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 3.4466019417475726, |
|
"grad_norm": 0.17543747218166306, |
|
"learning_rate": 2.1346401746354576e-05, |
|
"loss": 0.4828, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 3.4563106796116507, |
|
"grad_norm": 0.17798441146417476, |
|
"learning_rate": 2.110674051886488e-05, |
|
"loss": 0.4838, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 3.466019417475728, |
|
"grad_norm": 0.16029275511201987, |
|
"learning_rate": 2.0867949138608242e-05, |
|
"loss": 0.4841, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 3.475728155339806, |
|
"grad_norm": 0.1663439154413797, |
|
"learning_rate": 2.06300385995596e-05, |
|
"loss": 0.4865, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 3.4854368932038833, |
|
"grad_norm": 0.16712634012536365, |
|
"learning_rate": 2.0393019855139915e-05, |
|
"loss": 0.4833, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 3.4951456310679614, |
|
"grad_norm": 0.17251097817233577, |
|
"learning_rate": 2.0156903817711812e-05, |
|
"loss": 0.485, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.5048543689320386, |
|
"grad_norm": 0.17262950471482985, |
|
"learning_rate": 1.9921701358077265e-05, |
|
"loss": 0.4846, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 3.5145631067961167, |
|
"grad_norm": 0.17329043714528009, |
|
"learning_rate": 1.9687423304976994e-05, |
|
"loss": 0.4835, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 3.524271844660194, |
|
"grad_norm": 0.16219535331159693, |
|
"learning_rate": 1.9454080444591998e-05, |
|
"loss": 0.4849, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 3.533980582524272, |
|
"grad_norm": 0.16515501782595504, |
|
"learning_rate": 1.9221683520046892e-05, |
|
"loss": 0.4857, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 3.5436893203883493, |
|
"grad_norm": 0.19734035971441835, |
|
"learning_rate": 1.899024323091539e-05, |
|
"loss": 0.4836, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 3.5533980582524274, |
|
"grad_norm": 0.16333328213007223, |
|
"learning_rate": 1.875977023272757e-05, |
|
"loss": 0.485, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.5631067961165046, |
|
"grad_norm": 0.18228849361538985, |
|
"learning_rate": 1.853027513647937e-05, |
|
"loss": 0.4869, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 3.5728155339805827, |
|
"grad_norm": 0.14382298808613353, |
|
"learning_rate": 1.8301768508144078e-05, |
|
"loss": 0.4837, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 3.58252427184466, |
|
"grad_norm": 0.1436393142572651, |
|
"learning_rate": 1.8074260868185784e-05, |
|
"loss": 0.4828, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 3.592233009708738, |
|
"grad_norm": 0.16780032690913915, |
|
"learning_rate": 1.7847762691075115e-05, |
|
"loss": 0.487, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.6019417475728153, |
|
"grad_norm": 0.14947935740806928, |
|
"learning_rate": 1.762228440480692e-05, |
|
"loss": 0.4845, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 3.6116504854368934, |
|
"grad_norm": 0.14559731703552367, |
|
"learning_rate": 1.7397836390420192e-05, |
|
"loss": 0.4836, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 3.6213592233009706, |
|
"grad_norm": 0.1506762657862038, |
|
"learning_rate": 1.717442898152012e-05, |
|
"loss": 0.4855, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 3.6310679611650487, |
|
"grad_norm": 0.13248764059764206, |
|
"learning_rate": 1.6952072463802326e-05, |
|
"loss": 0.4848, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 3.6407766990291264, |
|
"grad_norm": 0.14524262090031537, |
|
"learning_rate": 1.6730777074579346e-05, |
|
"loss": 0.4841, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.650485436893204, |
|
"grad_norm": 0.13279946211767724, |
|
"learning_rate": 1.651055300230922e-05, |
|
"loss": 0.4847, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 3.6601941747572817, |
|
"grad_norm": 0.14766932876767538, |
|
"learning_rate": 1.6291410386126524e-05, |
|
"loss": 0.4854, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 3.6699029126213594, |
|
"grad_norm": 0.14421279285412036, |
|
"learning_rate": 1.607335931537547e-05, |
|
"loss": 0.4834, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 3.679611650485437, |
|
"grad_norm": 0.1332433537811279, |
|
"learning_rate": 1.585640982914541e-05, |
|
"loss": 0.4881, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 3.6893203883495147, |
|
"grad_norm": 0.13964291558360487, |
|
"learning_rate": 1.564057191580873e-05, |
|
"loss": 0.4812, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.6990291262135924, |
|
"grad_norm": 0.13379924755179787, |
|
"learning_rate": 1.54258555125608e-05, |
|
"loss": 0.4865, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 3.70873786407767, |
|
"grad_norm": 0.13942795631282168, |
|
"learning_rate": 1.521227050496266e-05, |
|
"loss": 0.4862, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 3.7184466019417477, |
|
"grad_norm": 0.14829905133246946, |
|
"learning_rate": 1.4999826726485754e-05, |
|
"loss": 0.4841, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 3.7281553398058254, |
|
"grad_norm": 0.13774937243212151, |
|
"learning_rate": 1.4788533958059281e-05, |
|
"loss": 0.4873, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 3.737864077669903, |
|
"grad_norm": 0.14632219044552267, |
|
"learning_rate": 1.457840192761979e-05, |
|
"loss": 0.4854, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 3.7475728155339807, |
|
"grad_norm": 0.15302510684664683, |
|
"learning_rate": 1.4369440309663412e-05, |
|
"loss": 0.4833, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 3.7572815533980584, |
|
"grad_norm": 0.14352691900463502, |
|
"learning_rate": 1.4161658724800357e-05, |
|
"loss": 0.4846, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 3.766990291262136, |
|
"grad_norm": 0.1517023916720968, |
|
"learning_rate": 1.3955066739312e-05, |
|
"loss": 0.4867, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 3.7766990291262137, |
|
"grad_norm": 0.15194309330245784, |
|
"learning_rate": 1.3749673864710524e-05, |
|
"loss": 0.4865, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 3.7864077669902914, |
|
"grad_norm": 0.1468613357237534, |
|
"learning_rate": 1.3545489557300853e-05, |
|
"loss": 0.4846, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.796116504854369, |
|
"grad_norm": 0.16395511168934657, |
|
"learning_rate": 1.3342523217745473e-05, |
|
"loss": 0.4869, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 3.8058252427184467, |
|
"grad_norm": 0.15078193434327586, |
|
"learning_rate": 1.3140784190631459e-05, |
|
"loss": 0.4825, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 3.8155339805825244, |
|
"grad_norm": 0.14510817920359698, |
|
"learning_rate": 1.2940281764040368e-05, |
|
"loss": 0.4825, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 3.825242718446602, |
|
"grad_norm": 0.17265976668387825, |
|
"learning_rate": 1.2741025169120539e-05, |
|
"loss": 0.4872, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 3.8349514563106797, |
|
"grad_norm": 0.14620030187493488, |
|
"learning_rate": 1.2543023579662106e-05, |
|
"loss": 0.4845, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 3.8446601941747574, |
|
"grad_norm": 0.1418045201790065, |
|
"learning_rate": 1.234628611167469e-05, |
|
"loss": 0.4845, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 3.854368932038835, |
|
"grad_norm": 0.14833568549734966, |
|
"learning_rate": 1.2150821822967611e-05, |
|
"loss": 0.4882, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 3.8640776699029127, |
|
"grad_norm": 0.147355117137231, |
|
"learning_rate": 1.1956639712732958e-05, |
|
"loss": 0.4845, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 3.8737864077669903, |
|
"grad_norm": 0.13630141790437844, |
|
"learning_rate": 1.1763748721131142e-05, |
|
"loss": 0.4819, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 3.883495145631068, |
|
"grad_norm": 0.13485086713221084, |
|
"learning_rate": 1.1572157728879444e-05, |
|
"loss": 0.485, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.8932038834951457, |
|
"grad_norm": 0.14115351641144186, |
|
"learning_rate": 1.1381875556843007e-05, |
|
"loss": 0.4874, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 3.9029126213592233, |
|
"grad_norm": 0.13251154708545737, |
|
"learning_rate": 1.119291096562884e-05, |
|
"loss": 0.4879, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 3.912621359223301, |
|
"grad_norm": 0.14538526251672798, |
|
"learning_rate": 1.1005272655182378e-05, |
|
"loss": 0.487, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 3.9223300970873787, |
|
"grad_norm": 0.14640871835491487, |
|
"learning_rate": 1.0818969264386973e-05, |
|
"loss": 0.4843, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 3.9320388349514563, |
|
"grad_norm": 0.13392456360817495, |
|
"learning_rate": 1.0634009370666214e-05, |
|
"loss": 0.4826, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 3.941747572815534, |
|
"grad_norm": 0.15291675444361674, |
|
"learning_rate": 1.045040148958893e-05, |
|
"loss": 0.488, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 3.9514563106796117, |
|
"grad_norm": 0.13901859144613157, |
|
"learning_rate": 1.0268154074477188e-05, |
|
"loss": 0.4853, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 3.9611650485436893, |
|
"grad_norm": 0.13797907262373485, |
|
"learning_rate": 1.0087275516017083e-05, |
|
"loss": 0.4834, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 3.970873786407767, |
|
"grad_norm": 0.13762327771479307, |
|
"learning_rate": 9.907774141872468e-06, |
|
"loss": 0.488, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 3.9805825242718447, |
|
"grad_norm": 0.14986633803246888, |
|
"learning_rate": 9.729658216301479e-06, |
|
"loss": 0.4828, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.9902912621359223, |
|
"grad_norm": 0.13435313597563736, |
|
"learning_rate": 9.552935939776083e-06, |
|
"loss": 0.4871, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.14759467236298449, |
|
"learning_rate": 9.377615448604574e-06, |
|
"loss": 0.4831, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 4.009708737864078, |
|
"grad_norm": 0.17306287750975244, |
|
"learning_rate": 9.203704814556871e-06, |
|
"loss": 0.4669, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 4.019417475728155, |
|
"grad_norm": 0.14023047375285927, |
|
"learning_rate": 9.031212044493016e-06, |
|
"loss": 0.4659, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 4.029126213592233, |
|
"grad_norm": 0.14153603129051684, |
|
"learning_rate": 8.860145079994433e-06, |
|
"loss": 0.4656, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 4.038834951456311, |
|
"grad_norm": 0.14948734290428067, |
|
"learning_rate": 8.690511796998344e-06, |
|
"loss": 0.4653, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 4.048543689320389, |
|
"grad_norm": 0.16197544725648444, |
|
"learning_rate": 8.522320005435162e-06, |
|
"loss": 0.4686, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 4.058252427184466, |
|
"grad_norm": 0.16013381839658847, |
|
"learning_rate": 8.355577448868933e-06, |
|
"loss": 0.4709, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 4.067961165048544, |
|
"grad_norm": 0.13999136843500865, |
|
"learning_rate": 8.190291804140775e-06, |
|
"loss": 0.4648, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 4.077669902912621, |
|
"grad_norm": 0.1470240140496139, |
|
"learning_rate": 8.02647068101547e-06, |
|
"loss": 0.4681, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.087378640776699, |
|
"grad_norm": 0.1355700665658678, |
|
"learning_rate": 7.864121621831126e-06, |
|
"loss": 0.467, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 4.097087378640777, |
|
"grad_norm": 0.14144514644659487, |
|
"learning_rate": 7.703252101151873e-06, |
|
"loss": 0.4613, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 4.106796116504855, |
|
"grad_norm": 0.129930341121314, |
|
"learning_rate": 7.5438695254238e-06, |
|
"loss": 0.4638, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 4.116504854368932, |
|
"grad_norm": 0.14218321035952994, |
|
"learning_rate": 7.385981232633894e-06, |
|
"loss": 0.465, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.12621359223301, |
|
"grad_norm": 0.1420896044955223, |
|
"learning_rate": 7.229594491972256e-06, |
|
"loss": 0.4659, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 4.135922330097087, |
|
"grad_norm": 0.13076263084557904, |
|
"learning_rate": 7.07471650349739e-06, |
|
"loss": 0.4648, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 4.145631067961165, |
|
"grad_norm": 0.13079701173247169, |
|
"learning_rate": 6.921354397804712e-06, |
|
"loss": 0.4651, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 4.155339805825243, |
|
"grad_norm": 0.1299754901865967, |
|
"learning_rate": 6.7695152356983054e-06, |
|
"loss": 0.4647, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 4.165048543689321, |
|
"grad_norm": 0.12764332296015807, |
|
"learning_rate": 6.619206007865768e-06, |
|
"loss": 0.4607, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 4.174757281553398, |
|
"grad_norm": 0.12376699505437842, |
|
"learning_rate": 6.47043363455643e-06, |
|
"loss": 0.4674, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.184466019417476, |
|
"grad_norm": 0.11650682736889516, |
|
"learning_rate": 6.323204965262686e-06, |
|
"loss": 0.463, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 4.194174757281553, |
|
"grad_norm": 0.1217502825230698, |
|
"learning_rate": 6.177526778404663e-06, |
|
"loss": 0.4679, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 4.203883495145631, |
|
"grad_norm": 0.11887732797821776, |
|
"learning_rate": 6.033405781018195e-06, |
|
"loss": 0.4655, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 4.213592233009709, |
|
"grad_norm": 0.12496115598562435, |
|
"learning_rate": 5.8908486084459134e-06, |
|
"loss": 0.4635, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 4.223300970873787, |
|
"grad_norm": 0.12470991178018197, |
|
"learning_rate": 5.74986182403189e-06, |
|
"loss": 0.4646, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 4.233009708737864, |
|
"grad_norm": 0.1292310303978258, |
|
"learning_rate": 5.610451918819357e-06, |
|
"loss": 0.4651, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 4.242718446601942, |
|
"grad_norm": 0.13694613972829348, |
|
"learning_rate": 5.472625311251918e-06, |
|
"loss": 0.4667, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 4.252427184466019, |
|
"grad_norm": 0.12260186181424092, |
|
"learning_rate": 5.336388346878006e-06, |
|
"loss": 0.4654, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 4.262135922330097, |
|
"grad_norm": 0.12523666490053748, |
|
"learning_rate": 5.201747298058765e-06, |
|
"loss": 0.4604, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 4.271844660194175, |
|
"grad_norm": 0.12451383039630047, |
|
"learning_rate": 5.068708363679249e-06, |
|
"loss": 0.4645, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.281553398058253, |
|
"grad_norm": 0.10990810773913537, |
|
"learning_rate": 4.937277668863014e-06, |
|
"loss": 0.4647, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 4.29126213592233, |
|
"grad_norm": 0.10982607372347657, |
|
"learning_rate": 4.807461264690157e-06, |
|
"loss": 0.465, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 4.300970873786408, |
|
"grad_norm": 0.12022614154318102, |
|
"learning_rate": 4.67926512791868e-06, |
|
"loss": 0.4654, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 4.310679611650485, |
|
"grad_norm": 0.10923349467562726, |
|
"learning_rate": 4.552695160709362e-06, |
|
"loss": 0.466, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 4.320388349514563, |
|
"grad_norm": 0.10653612984033727, |
|
"learning_rate": 4.427757190353976e-06, |
|
"loss": 0.4684, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 4.330097087378641, |
|
"grad_norm": 0.10730397458255132, |
|
"learning_rate": 4.304456969007049e-06, |
|
"loss": 0.4657, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 4.339805825242719, |
|
"grad_norm": 0.1003612678568525, |
|
"learning_rate": 4.182800173420991e-06, |
|
"loss": 0.4649, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 4.349514563106796, |
|
"grad_norm": 0.11090894330673261, |
|
"learning_rate": 4.06279240468475e-06, |
|
"loss": 0.4631, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 4.359223300970874, |
|
"grad_norm": 0.1167058523870307, |
|
"learning_rate": 3.9444391879659604e-06, |
|
"loss": 0.4665, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 4.368932038834951, |
|
"grad_norm": 0.10433976260435962, |
|
"learning_rate": 3.827745972256529e-06, |
|
"loss": 0.4659, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.378640776699029, |
|
"grad_norm": 0.10011905909384386, |
|
"learning_rate": 3.7127181301217817e-06, |
|
"loss": 0.4641, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 4.388349514563107, |
|
"grad_norm": 0.10655509962726566, |
|
"learning_rate": 3.599360957453102e-06, |
|
"loss": 0.467, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 4.398058252427185, |
|
"grad_norm": 0.10938664062060237, |
|
"learning_rate": 3.487679673224129e-06, |
|
"loss": 0.4607, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 4.407766990291262, |
|
"grad_norm": 0.10191086413678936, |
|
"learning_rate": 3.3776794192504412e-06, |
|
"loss": 0.4623, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 4.41747572815534, |
|
"grad_norm": 0.09548255692426758, |
|
"learning_rate": 3.269365259952859e-06, |
|
"loss": 0.4626, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 4.427184466019417, |
|
"grad_norm": 0.10109872749133958, |
|
"learning_rate": 3.1627421821242586e-06, |
|
"loss": 0.4627, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 4.436893203883495, |
|
"grad_norm": 0.1003445240191667, |
|
"learning_rate": 3.0578150946999695e-06, |
|
"loss": 0.4637, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 4.446601941747573, |
|
"grad_norm": 0.10094079472060287, |
|
"learning_rate": 2.954588828531817e-06, |
|
"loss": 0.4671, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 4.456310679611651, |
|
"grad_norm": 0.09877743367390623, |
|
"learning_rate": 2.8530681361656422e-06, |
|
"loss": 0.4649, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 4.466019417475728, |
|
"grad_norm": 0.09360681988866616, |
|
"learning_rate": 2.7532576916225395e-06, |
|
"loss": 0.4678, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.475728155339806, |
|
"grad_norm": 0.09976388673239875, |
|
"learning_rate": 2.6551620901836515e-06, |
|
"loss": 0.4645, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 4.485436893203883, |
|
"grad_norm": 0.0965607126962889, |
|
"learning_rate": 2.5587858481786086e-06, |
|
"loss": 0.4664, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 4.495145631067961, |
|
"grad_norm": 0.09897406466321508, |
|
"learning_rate": 2.4641334027775755e-06, |
|
"loss": 0.467, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 4.504854368932039, |
|
"grad_norm": 0.10396092442681994, |
|
"learning_rate": 2.371209111786987e-06, |
|
"loss": 0.468, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 4.514563106796117, |
|
"grad_norm": 0.0948373657066504, |
|
"learning_rate": 2.280017253448916e-06, |
|
"loss": 0.4642, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 4.524271844660194, |
|
"grad_norm": 0.09507655846542853, |
|
"learning_rate": 2.190562026244072e-06, |
|
"loss": 0.4669, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 4.533980582524272, |
|
"grad_norm": 0.09659880423736872, |
|
"learning_rate": 2.102847548698539e-06, |
|
"loss": 0.4642, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 4.543689320388349, |
|
"grad_norm": 0.09162921938645756, |
|
"learning_rate": 2.0168778591941242e-06, |
|
"loss": 0.4694, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 4.553398058252427, |
|
"grad_norm": 0.08943623447949108, |
|
"learning_rate": 1.9326569157824736e-06, |
|
"loss": 0.4654, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 4.563106796116505, |
|
"grad_norm": 0.0958809139961805, |
|
"learning_rate": 1.850188596002802e-06, |
|
"loss": 0.4638, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.572815533980583, |
|
"grad_norm": 0.0928325035695772, |
|
"learning_rate": 1.7694766967033805e-06, |
|
"loss": 0.4699, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 4.58252427184466, |
|
"grad_norm": 0.09326809079774372, |
|
"learning_rate": 1.6905249338667617e-06, |
|
"loss": 0.4654, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 4.592233009708738, |
|
"grad_norm": 0.08894714059682778, |
|
"learning_rate": 1.613336942438637e-06, |
|
"loss": 0.4678, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 4.601941747572815, |
|
"grad_norm": 0.09702666150761258, |
|
"learning_rate": 1.5379162761605427e-06, |
|
"loss": 0.4642, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 4.611650485436893, |
|
"grad_norm": 0.0925613181306804, |
|
"learning_rate": 1.4642664074061962e-06, |
|
"loss": 0.463, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 4.621359223300971, |
|
"grad_norm": 0.0934935785749229, |
|
"learning_rate": 1.3923907270216819e-06, |
|
"loss": 0.4636, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 4.631067961165049, |
|
"grad_norm": 0.08692636915664595, |
|
"learning_rate": 1.3222925441692635e-06, |
|
"loss": 0.4656, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 4.640776699029126, |
|
"grad_norm": 0.08592627332074977, |
|
"learning_rate": 1.2539750861751031e-06, |
|
"loss": 0.464, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 4.650485436893204, |
|
"grad_norm": 0.08826705670035949, |
|
"learning_rate": 1.1874414983806283e-06, |
|
"loss": 0.4669, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 4.660194174757281, |
|
"grad_norm": 0.08820160855473339, |
|
"learning_rate": 1.1226948439977314e-06, |
|
"loss": 0.4631, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.669902912621359, |
|
"grad_norm": 0.09622872398963238, |
|
"learning_rate": 1.0597381039677646e-06, |
|
"loss": 0.4686, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 4.679611650485437, |
|
"grad_norm": 0.08913032971687475, |
|
"learning_rate": 9.985741768242429e-07, |
|
"loss": 0.4647, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 4.689320388349515, |
|
"grad_norm": 0.09028397610583812, |
|
"learning_rate": 9.392058785594504e-07, |
|
"loss": 0.4623, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 4.699029126213592, |
|
"grad_norm": 0.09375615377365946, |
|
"learning_rate": 8.816359424947652e-07, |
|
"loss": 0.4629, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 4.70873786407767, |
|
"grad_norm": 0.09058552987790736, |
|
"learning_rate": 8.258670191548135e-07, |
|
"loss": 0.4633, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 4.718446601941747, |
|
"grad_norm": 0.08822401104082032, |
|
"learning_rate": 7.719016761454479e-07, |
|
"loss": 0.4668, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 4.728155339805825, |
|
"grad_norm": 0.0922329791045878, |
|
"learning_rate": 7.197423980355344e-07, |
|
"loss": 0.4665, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 4.737864077669903, |
|
"grad_norm": 0.08744767491607641, |
|
"learning_rate": 6.693915862425692e-07, |
|
"loss": 0.4657, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.747572815533981, |
|
"grad_norm": 0.09683409497038403, |
|
"learning_rate": 6.20851558922091e-07, |
|
"loss": 0.4615, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 4.757281553398058, |
|
"grad_norm": 0.08456750174183916, |
|
"learning_rate": 5.741245508609972e-07, |
|
"loss": 0.4649, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.766990291262136, |
|
"grad_norm": 0.08773860548864587, |
|
"learning_rate": 5.292127133746005e-07, |
|
"loss": 0.4653, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 4.776699029126213, |
|
"grad_norm": 0.0917555007099176, |
|
"learning_rate": 4.861181142076276e-07, |
|
"loss": 0.4656, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 4.786407766990291, |
|
"grad_norm": 0.08845762170540458, |
|
"learning_rate": 4.448427374389974e-07, |
|
"loss": 0.4692, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 4.796116504854369, |
|
"grad_norm": 0.08827155953592115, |
|
"learning_rate": 4.053884833904809e-07, |
|
"loss": 0.4662, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 4.805825242718447, |
|
"grad_norm": 0.08804630863083254, |
|
"learning_rate": 3.677571685392023e-07, |
|
"loss": 0.4653, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 4.815533980582524, |
|
"grad_norm": 0.09273872362298237, |
|
"learning_rate": 3.319505254340172e-07, |
|
"loss": 0.4674, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 4.825242718446602, |
|
"grad_norm": 0.08937217299785047, |
|
"learning_rate": 2.9797020261574494e-07, |
|
"loss": 0.4675, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 4.834951456310679, |
|
"grad_norm": 0.08269997531774376, |
|
"learning_rate": 2.6581776454126075e-07, |
|
"loss": 0.4683, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 4.844660194174757, |
|
"grad_norm": 0.08810498069517812, |
|
"learning_rate": 2.3549469151149085e-07, |
|
"loss": 0.4658, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 4.854368932038835, |
|
"grad_norm": 0.08729625064317513, |
|
"learning_rate": 2.0700237960322279e-07, |
|
"loss": 0.4696, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.864077669902913, |
|
"grad_norm": 0.08416226889517374, |
|
"learning_rate": 1.803421406048589e-07, |
|
"loss": 0.4636, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 4.87378640776699, |
|
"grad_norm": 0.08684248344197969, |
|
"learning_rate": 1.5551520195601577e-07, |
|
"loss": 0.4647, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 4.883495145631068, |
|
"grad_norm": 0.08935580567045806, |
|
"learning_rate": 1.3252270669100953e-07, |
|
"loss": 0.4642, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 4.893203883495145, |
|
"grad_norm": 0.0846095709740149, |
|
"learning_rate": 1.113657133862267e-07, |
|
"loss": 0.4626, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 4.902912621359223, |
|
"grad_norm": 0.08317972288259207, |
|
"learning_rate": 9.204519611138995e-08, |
|
"loss": 0.4628, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 4.9126213592233015, |
|
"grad_norm": 0.0845289066820202, |
|
"learning_rate": 7.45620443847228e-08, |
|
"loss": 0.4657, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 4.922330097087379, |
|
"grad_norm": 0.08611674149409979, |
|
"learning_rate": 5.891706313197354e-08, |
|
"loss": 0.4643, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 4.932038834951456, |
|
"grad_norm": 0.08707702652246953, |
|
"learning_rate": 4.511097264938258e-08, |
|
"loss": 0.4653, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 4.941747572815534, |
|
"grad_norm": 0.08653800248757759, |
|
"learning_rate": 3.314440857049572e-08, |
|
"loss": 0.4679, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 4.951456310679612, |
|
"grad_norm": 0.08635221643773852, |
|
"learning_rate": 2.3017921836916425e-08, |
|
"loss": 0.4627, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.961165048543689, |
|
"grad_norm": 0.08744848791920268, |
|
"learning_rate": 1.4731978672939407e-08, |
|
"loss": 0.4664, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 4.970873786407767, |
|
"grad_norm": 0.08802432305414154, |
|
"learning_rate": 8.286960564065639e-09, |
|
"loss": 0.4628, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 4.980582524271845, |
|
"grad_norm": 0.08116167073862661, |
|
"learning_rate": 3.683164239469683e-09, |
|
"loss": 0.4637, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 4.990291262135923, |
|
"grad_norm": 0.08686945147014129, |
|
"learning_rate": 9.208016583128754e-10, |
|
"loss": 0.4606, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.08703184321831914, |
|
"learning_rate": 0.0, |
|
"loss": 0.4639, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 515, |
|
"total_flos": 8639713262960640.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 13.1123, |
|
"train_samples_per_second": 20070.162, |
|
"train_steps_per_second": 39.276 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 515, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8639713262960640.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|